{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 聚体判断标准\n",
    "\n",
    "> 暂时无视Cleaned PDB\n",
    "\n",
    "1. 单体：PDB中只有一条蛋白链\n",
    "2. 同聚体：PDB中的一对链属于同一蛋白且覆盖范围接近；属于同一蛋白但是覆盖范围不接近的注释掉\n",
    "3. 异聚体：PDB中一对链属于不同蛋白\n",
    "\n",
    "## Clean PDB\n",
    "\n",
    "### Entry level\n",
    "\n",
    "1. PDB中存在一条未知蛋白链(或是一条链的最佳匹配蛋白的identity不符阈值)即注释掉该Entry\n",
    "2. PDB中存在UNK/CA-ONLY即注释掉该Entry\n",
    "3. PDB实验方法/resolution不符要求即注释\n",
    "4. SIFTS Mapping中的需delete的map\n",
    "5. PDB中存在DNA/RNA杂交链的Entry需注释\n",
    "\n",
    "### Chain level\n",
    "\n",
    "1. PDB链的ATOM_LEN不符阈值\n",
    "    * ATOM_LEN = OBSERVED_RES_LEN - OBSERVED_MODIFIED_RES_LEN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(r'C:\\GitWorks\\Muta3DMaps')\n",
    "from Muta3DMaps.core.pdbe.decode import ProcessSIFTS, SeqRangeReader, ProcessEntryData\n",
    "from Muta3DMaps.core.uniprot.decode import UniProtFASTA\n",
    "from Muta3DMaps.core.retrieve.fetchFiles import UnsyncFetch\n",
    "from tablib import Dataset\n",
    "import pandas as pd\n",
    "import pyexcel as pe\n",
    "import numpy as np\n",
    "from pprint import pprint\n",
    "import ujson as json\n",
    "from collections import Counter, defaultdict\n",
    "from time import perf_counter\n",
    "from itertools import product, combinations, combinations_with_replacement"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "UnsyncFetch.use_existing = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "ProcessSIFTS.init_logger()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  3.24it/s]\n",
      "2020-02-22 11:56:18,710 ProcessSIFTS INFO 5 ids downloaded in 1.56s\n"
     ]
    }
   ],
   "source": [
    "dfrm = ProcessSIFTS.main(\n",
    "    filePath=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data\\uniprot_pdb.tsv',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data',\n",
    "    related_unp={'Q9BQ50', 'P51587', 'P0DP91'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "dfrm_0221.tsv:\n",
       "<table>\n",
       "<thead>\n",
       "<tr><th>UniProt  </th><th>chain_id  </th><th>end                                                                           </th><th style=\"text-align: right;\">  entity_id</th><th>identifier  </th><th style=\"text-align: right;\">  identity</th><th>is_canonical  </th><th>name       </th><th>pdb_id  </th><th>start                                                                       </th><th>struct_asym_id  </th><th>sifts_pdb_range                  </th><th>sifts_unp_range                   </th><th>Entry  </th><th>pdb_GAP_list  </th><th>unp_GAP_list  </th><th>var_list  </th><th>delete  </th><th style=\"text-align: right;\">  var_0_count</th><th style=\"text-align: right;\">  unp_GAP_0_count</th><th style=\"text-align: right;\">  group_info</th><th style=\"text-align: right;\">  sifts_unp_pdb_var</th><th>sifts_range_tage    </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td>P53350   </td><td>A         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":245}</td><td style=\"text-align: right;\">          1</td><td>PLK1_HUMAN  </td><td style=\"text-align: right;\">     0.988</td><td>True          </td><td>PLK1_HUMAN </td><td>6gy2    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":7}</td><td>A               </td><td>[[7,245]]                        </td><td>[[365,603]]                       </td><td>P53350 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P53350   </td><td>B         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":245}</td><td style=\"text-align: right;\">          1</td><td>PLK1_HUMAN  </td><td style=\"text-align: right;\">     0.988</td><td>True          </td><td>PLK1_HUMAN </td><td>6gy2    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":7}</td><td>B               </td><td>[[7,245]]                        </td><td>[[365,603]]                       </td><td>P53350 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>C         </td><td>{\"author_residue_number\":210,\"author_insertion_code\":\"\",\"residue_number\":17}  </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>BRCA2_HUMAN</td><td>6gy2    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>C               </td><td>[[1,17]]                         </td><td>[[194,210]]                       </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>D         </td><td>{\"author_residue_number\":210,\"author_insertion_code\":\"\",\"residue_number\":17}  </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>BRCA2_HUMAN</td><td>6gy2    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>D               </td><td>[[1,17]]                         </td><td>[[194,210]]                       </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>I         </td><td>{\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.893</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>I               </td><td>[[1,28]]                         </td><td>[[1226,1253]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>J         </td><td>{\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.893</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>J               </td><td>[[1,28]]                         </td><td>[[1226,1253]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>K         </td><td>{\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.893</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>K               </td><td>[[1,28]]                         </td><td>[[1226,1253]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>L         </td><td>{\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.893</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>L               </td><td>[[1,28]]                         </td><td>[[1226,1253]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>M         </td><td>{\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.893</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>M               </td><td>[[1,28]]                         </td><td>[[1226,1253]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>N         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":28} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.893</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>N               </td><td>[[1,28]]                         </td><td>[[1226,1253]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>O         </td><td>{\"author_residue_number\":2064,\"author_insertion_code\":\"\",\"residue_number\":15} </td><td style=\"text-align: right;\">          3</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     0.909</td><td>True          </td><td>BRCA2_HUMAN</td><td>6hqu    </td><td>{\"author_residue_number\":1230,\"author_insertion_code\":\"\",\"residue_number\":5}</td><td>O               </td><td>[[5,15]]                         </td><td>[[2054,2064]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q86YC2   </td><td>A         </td><td>{\"author_residue_number\":1186,\"author_insertion_code\":\"\",\"residue_number\":356}</td><td style=\"text-align: right;\">          1</td><td>PALB2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>PALB2_HUMAN</td><td>3eu7    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":5}</td><td>A               </td><td>[[5,356]]                        </td><td>[[835,1186]]                      </td><td>Q86YC2 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>X         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":19} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>BRCA2_HUMAN</td><td>3eu7    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>B               </td><td>[[1,19]]                         </td><td>[[21,39]]                         </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q9BQ50   </td><td>A         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238}</td><td style=\"text-align: right;\">          1</td><td>TREX2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>TREX2_HUMAN</td><td>1y97    </td><td>{\"author_residue_number\":1,\"author_insertion_code\":\"\",\"residue_number\":3}   </td><td>A               </td><td>[[3,238]]                        </td><td>[[1,236]]                         </td><td>Q9BQ50 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q9BQ50   </td><td>B         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238}</td><td style=\"text-align: right;\">          1</td><td>TREX2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>TREX2_HUMAN</td><td>1y97    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":3}</td><td>B               </td><td>[[3,238]]                        </td><td>[[1,236]]                         </td><td>Q9BQ50 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q9BQ50-1 </td><td>A         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238}</td><td style=\"text-align: right;\">          1</td><td>TREX2_HUMAN </td><td style=\"text-align: right;\">     0.996</td><td>False         </td><td>TREX2_HUMAN</td><td>1y97    </td><td>{\"author_residue_number\":0,\"author_insertion_code\":\"\",\"residue_number\":2}   </td><td>A               </td><td>[[2,238]]                        </td><td>[[43,279]]                        </td><td>Q9BQ50 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q9BQ50-1 </td><td>B         </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238}</td><td style=\"text-align: right;\">          1</td><td>TREX2_HUMAN </td><td style=\"text-align: right;\">     0.996</td><td>False         </td><td>TREX2_HUMAN</td><td>1y97    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":2}</td><td>B               </td><td>[[2,238]]                        </td><td>[[43,279]]                        </td><td>Q9BQ50 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q06609-3 </td><td>A         </td><td>{\"author_residue_number\":258,\"author_insertion_code\":\"\",\"residue_number\":162} </td><td style=\"text-align: right;\">          1</td><td>RAD51_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>False         </td><td>RAD51_HUMAN</td><td>1n0w    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>A               </td><td>[[1,162]]                        </td><td>[[97,258]]                        </td><td>Q06609 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q06609-2 </td><td>A         </td><td>{\"author_residue_number\":339,\"author_insertion_code\":\"\",\"residue_number\":243} </td><td style=\"text-align: right;\">          1</td><td>RAD51_HUMAN </td><td style=\"text-align: right;\">     0.753</td><td>False         </td><td>RAD51_HUMAN</td><td>1n0w    </td><td>{\"author_residue_number\":156,\"author_insertion_code\":\"\",\"residue_number\":60}</td><td>A               </td><td>[[7,30],[34,36],[41,51],[60,243]]</td><td>[[19,42],[43,45],[46,56],[57,242]]</td><td>Q06609 </td><td>[3,4,8]       </td><td>[0,0,0]       </td><td>[0,0,0,2] </td><td>False   </td><td style=\"text-align: right;\">            3</td><td style=\"text-align: right;\">                3</td><td style=\"text-align: right;\">           4</td><td style=\"text-align: right;\">                  0</td><td>Insertion & Deletion</td></tr>\n",
       "<tr><td>Q06609   </td><td>A         </td><td>{\"author_residue_number\":339,\"author_insertion_code\":\"\",\"residue_number\":243} </td><td style=\"text-align: right;\">          1</td><td>RAD51_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>RAD51_HUMAN</td><td>1n0w    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>A               </td><td>[[1,243]]                        </td><td>[[97,339]]                        </td><td>Q06609 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>Q06609-4 </td><td>A         </td><td>{\"author_residue_number\":339,\"author_insertion_code\":\"\",\"residue_number\":243} </td><td style=\"text-align: right;\">          1</td><td>RAD51_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>False         </td><td>RAD51_HUMAN</td><td>1n0w    </td><td>{\"author_residue_number\":115,\"author_insertion_code\":\"\",\"residue_number\":19}</td><td>A               </td><td>[[19,243]]                       </td><td>[[116,340]]                       </td><td>Q06609 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "<tr><td>P51587   </td><td>B         </td><td>{\"author_residue_number\":1551,\"author_insertion_code\":\"\",\"residue_number\":35} </td><td style=\"text-align: right;\">          2</td><td>BRCA2_HUMAN </td><td style=\"text-align: right;\">     1    </td><td>True          </td><td>BRCA2_HUMAN</td><td>1n0w    </td><td>{\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1}</td><td>B               </td><td>[[1,35]]                         </td><td>[[1517,1551]]                     </td><td>P51587 </td><td>[]            </td><td>[]            </td><td>[0]       </td><td>False   </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">                0</td><td style=\"text-align: right;\">           1</td><td style=\"text-align: right;\">                  0</td><td>Safe                </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "dfrm_0221.tsv:\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| UniProt  | chain_id |                                      end                                       | entity_id | identifier  | identity | is_canonical |    name     | pdb_id |                                    start                                     | struct_asym_id |          sifts_pdb_range          |          sifts_unp_range           | Entry  | pdb_GAP_list | unp_GAP_list | var_list  | delete | var_0_count | unp_GAP_0_count | group_info | sifts_unp_pdb_var |   sifts_range_tage   |\n",
       "+==========+==========+================================================================================+===========+=============+==========+==============+=============+========+==============================================================================+================+===================================+====================================+========+==============+==============+===========+========+=============+=================+============+===================+======================+\n",
       "| P53350   | A        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":245} | 1         | PLK1_HUMAN  | 0.988    | True         | PLK1_HUMAN  | 6gy2   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":7} | A              | [[7,245]]                         | [[365,603]]                        | P53350 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P53350   | B        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":245} | 1         | PLK1_HUMAN  | 0.988    | True         | PLK1_HUMAN  | 6gy2   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":7} | B              | [[7,245]]                         | [[365,603]]                        | P53350 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | C        | {\"author_residue_number\":210,\"author_insertion_code\":\"\",\"residue_number\":17}   | 2         | BRCA2_HUMAN | 1.0      | True         | BRCA2_HUMAN | 6gy2   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1} | C              | [[1,17]]                          | [[194,210]]                        | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | D        | {\"author_residue_number\":210,\"author_insertion_code\":\"\",\"residue_number\":17}   | 2         | BRCA2_HUMAN | 1.0      | True         | BRCA2_HUMAN | 6gy2   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1} | D              | [[1,17]]                          | [[194,210]]                        | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | I        | {\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28}  | 2         | BRCA2_HUMAN | 0.893    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1} | I              | [[1,28]]                          | [[1226,1253]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | J        | {\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28}  | 2         | BRCA2_HUMAN | 0.893    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1} | J              | [[1,28]]                          | [[1226,1253]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | K        | {\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28}  | 2         | BRCA2_HUMAN | 0.893    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1} | K              | [[1,28]]                          | [[1226,1253]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | L        | {\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28}  | 2         | BRCA2_HUMAN | 0.893    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1} | L              | [[1,28]]                          | [[1226,1253]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | M        | {\"author_residue_number\":2054,\"author_insertion_code\":\"\",\"residue_number\":28}  | 2         | BRCA2_HUMAN | 0.893    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1} | M              | [[1,28]]                          | [[1226,1253]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | N        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":28}  | 2         | BRCA2_HUMAN | 0.893    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1226,\"author_insertion_code\":\"\",\"residue_number\":1} | N              | [[1,28]]                          | [[1226,1253]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | O        | {\"author_residue_number\":2064,\"author_insertion_code\":\"\",\"residue_number\":15}  | 3         | BRCA2_HUMAN | 0.909    | True         | BRCA2_HUMAN | 6hqu   | {\"author_residue_number\":1230,\"author_insertion_code\":\"\",\"residue_number\":5} | O              | [[5,15]]                          | [[2054,2064]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q86YC2   | A        | {\"author_residue_number\":1186,\"author_insertion_code\":\"\",\"residue_number\":356} | 1         | PALB2_HUMAN | 1.0      | True         | PALB2_HUMAN | 3eu7   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":5} | A              | [[5,356]]                         | [[835,1186]]                       | Q86YC2 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | X        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":19}  | 2         | BRCA2_HUMAN | 1.0      | True         | BRCA2_HUMAN | 3eu7   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1} | B              | [[1,19]]                          | [[21,39]]                          | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q9BQ50   | A        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238} | 1         | TREX2_HUMAN | 1.0      | True         | TREX2_HUMAN | 1y97   | {\"author_residue_number\":1,\"author_insertion_code\":\"\",\"residue_number\":3}    | A              | [[3,238]]                         | [[1,236]]                          | Q9BQ50 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q9BQ50   | B        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238} | 1         | TREX2_HUMAN | 1.0      | True         | TREX2_HUMAN | 1y97   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":3} | B              | [[3,238]]                         | [[1,236]]                          | Q9BQ50 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q9BQ50-1 | A        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238} | 1         | TREX2_HUMAN | 0.996    | False        | TREX2_HUMAN | 1y97   | {\"author_residue_number\":0,\"author_insertion_code\":\"\",\"residue_number\":2}    | A              | [[2,238]]                         | [[43,279]]                         | Q9BQ50 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q9BQ50-1 | B        | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":238} | 1         | TREX2_HUMAN | 0.996    | False        | TREX2_HUMAN | 1y97   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":2} | B              | [[2,238]]                         | [[43,279]]                         | Q9BQ50 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q06609-3 | A        | {\"author_residue_number\":258,\"author_insertion_code\":\"\",\"residue_number\":162}  | 1         | RAD51_HUMAN | 1.0      | False        | RAD51_HUMAN | 1n0w   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1} | A              | [[1,162]]                         | [[97,258]]                         | Q06609 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q06609-2 | A        | {\"author_residue_number\":339,\"author_insertion_code\":\"\",\"residue_number\":243}  | 1         | RAD51_HUMAN | 0.753    | False        | RAD51_HUMAN | 1n0w   | {\"author_residue_number\":156,\"author_insertion_code\":\"\",\"residue_number\":60} | A              | [[7,30],[34,36],[41,51],[60,243]] | [[19,42],[43,45],[46,56],[57,242]] | Q06609 | [3,4,8]      | [0,0,0]      | [0,0,0,2] | False  | 3           | 3               | 4          | 0                 | Insertion & Deletion |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q06609   | A        | {\"author_residue_number\":339,\"author_insertion_code\":\"\",\"residue_number\":243}  | 1         | RAD51_HUMAN | 1.0      | True         | RAD51_HUMAN | 1n0w   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1} | A              | [[1,243]]                         | [[97,339]]                         | Q06609 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| Q06609-4 | A        | {\"author_residue_number\":339,\"author_insertion_code\":\"\",\"residue_number\":243}  | 1         | RAD51_HUMAN | 1.0      | False        | RAD51_HUMAN | 1n0w   | {\"author_residue_number\":115,\"author_insertion_code\":\"\",\"residue_number\":19} | A              | [[19,243]]                        | [[116,340]]                        | Q06609 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+\n",
       "| P51587   | B        | {\"author_residue_number\":1551,\"author_insertion_code\":\"\",\"residue_number\":35}  | 2         | BRCA2_HUMAN | 1.0      | True         | BRCA2_HUMAN | 1n0w   | {\"author_residue_number\":null,\"author_insertion_code\":\"\",\"residue_number\":1} | B              | [[1,35]]                          | [[1517,1551]]                      | P51587 | []           | []           | [0]       | False  | 1           | 0               | 1          | 0                 | Safe                 |\n",
       "+----------+----------+--------------------------------------------------------------------------------+-----------+-------------+----------+--------------+-------------+--------+------------------------------------------------------------------------------+----------------+-----------------------------------+------------------------------------+--------+--------------+--------------+-----------+--------+-------------+-----------------+------------+-------------------+----------------------+"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# dfrm.to_csv(r'C:\\Users\\Nature\\Downloads\\dfrm_0221.tsv', sep='\\t', index=False)\n",
    "sheet = pe.get_sheet(file_name=r'C:\\Users\\Nature\\Downloads\\dfrm_0221.tsv', name_columns_by_row=0)\n",
    "sheet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(r'C:\\Users\\Nature\\Downloads\\dfrm_0221.tsv', 'r') as fh:\n",
    "    imported_data = Dataset().load(fh, format='tsv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0006537000000150783\n"
     ]
    }
   ],
   "source": [
    "data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))))\n",
    "t0 = perf_counter()\n",
    "for i in range(len(sheet)):\n",
    "    cur_dict = data[sheet[i, 'pdb_id']][sheet[i,'Entry']][sheet[i,'entity_id']][sheet[i,'chain_id']][sheet[i,'UniProt']]\n",
    "    for key in ('identity', 'sifts_unp_range', 'sifts_pdb_range', 'sifts_range_tage', 'delete'):\n",
    "        cur_dict[key] = sheet[i,key]\n",
    "print(perf_counter()-t0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.00019730000008166826\n"
     ]
    }
   ],
   "source": [
    "# BEST\n",
    "records = pe.get_records(file_name=r'C:\\Users\\Nature\\Downloads\\dfrm_0221.tsv', name_columns_by_row=0)\n",
    "data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))))\n",
    "t0 = perf_counter()\n",
    "for record in records:\n",
    "    cur_dict = data[record['pdb_id']][record['Entry']][record['entity_id']][record['chain_id']][record['UniProt']]\n",
    "    for key in ('identity', 'sifts_unp_range', 'sifts_pdb_range', 'sifts_range_tage', 'delete'):\n",
    "        cur_dict[key] = record[key]\n",
    "print(perf_counter()-t0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.0006253999999898952\n"
     ]
    }
   ],
   "source": [
    "data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))))\n",
    "t0 = perf_counter()\n",
    "for record in imported_data.dict:\n",
    "    cur_dict = data[record['pdb_id']][record['Entry']][record['entity_id']][record['chain_id']][record['UniProt']]\n",
    "    for key in ('identity', 'sifts_unp_range', 'sifts_pdb_range', 'sifts_range_tage', 'delete'):\n",
    "        cur_dict[key] = sheet[i,key]\n",
    "print(perf_counter()-t0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def traverseOligomeric(data):\n",
    "    for pdb_id, entries in data.items():\n",
    "        len(entries) # == 1 -> 非异聚体\n",
    "        for entry_id, entities in entries.items():\n",
    "            len(entities) # == 1 且非异聚体 -> pure单一蛋白; != 1 且非异聚体 -> pure单一蛋白，但是可能覆盖范围差异\n",
    "            for entity_id, chains in entities.items():\n",
    "                len(chains) # == 1 且非"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "def validateSIFTS(data):\n",
    "    '''\n",
    "    验证同一Entry下, 每个Entity每条链下的Isoform情况相同\n",
    "    '''\n",
    "    for pdb_id, entries in data.items():\n",
    "        for entry_id, entities in entries.items():\n",
    "            for entity_id, chains in entities.items():\n",
    "                isoformSet = set(tuple(isoforms.keys()) for chain_id, isoforms in chains.items())\n",
    "                # print(isoformSet)\n",
    "                if len(isoformSet) > 1:\n",
    "                    raise ValueError(f'{pdb_id}_{entry_id}_{entity_id}: {isoformSet}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "validateSIFTS(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "def traverseSIFTS(data):\n",
    "    '''\n",
    "    同一PDB下, 内任意两条链是否属于同一蛋白, 且覆盖范围是否相似\n",
    "    '''\n",
    "    for pdb_id, entries in data.items():\n",
    "        for entry_id, entities in entries.items():\n",
    "            # 此层下所有链都同属一蛋白，覆盖范围是否一致暂未确认\n",
    "            for entity_id_l, entity_id_r in combinations_with_replacement(entities.keys(), 2):\n",
    "                if entity_id_l != entity_id_r:\n",
    "                    chain_ids_l = entities[entity_id_l].keys()\n",
    "                    chain_ids_r = entities[entity_id_r].keys()\n",
    "                    for res in product(chain_ids_l, chain_ids_r):\n",
    "                        yield pdb_id, 'ho?', res\n",
    "                else:\n",
    "                    chain_ids = entities[entity_id_l].keys()\n",
    "                    for res in combinations(chain_ids, 2):\n",
    "                        yield pdb_id, 'ho', res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('6gy2', 'ho', ('A', 'B'))\n",
      "('6gy2', 'ho', ('C', 'D'))\n",
      "('6hqu', 'ho', ('I', 'J'))\n",
      "('6hqu', 'ho', ('I', 'K'))\n",
      "('6hqu', 'ho', ('I', 'L'))\n",
      "('6hqu', 'ho', ('I', 'M'))\n",
      "('6hqu', 'ho', ('I', 'N'))\n",
      "('6hqu', 'ho', ('J', 'K'))\n",
      "('6hqu', 'ho', ('J', 'L'))\n",
      "('6hqu', 'ho', ('J', 'M'))\n",
      "('6hqu', 'ho', ('J', 'N'))\n",
      "('6hqu', 'ho', ('K', 'L'))\n",
      "('6hqu', 'ho', ('K', 'M'))\n",
      "('6hqu', 'ho', ('K', 'N'))\n",
      "('6hqu', 'ho', ('L', 'M'))\n",
      "('6hqu', 'ho', ('L', 'N'))\n",
      "('6hqu', 'ho', ('M', 'N'))\n",
      "('6hqu', 'ho?', ('I', 'O'))\n",
      "('6hqu', 'ho?', ('J', 'O'))\n",
      "('6hqu', 'ho?', ('K', 'O'))\n",
      "('6hqu', 'ho?', ('L', 'O'))\n",
      "('6hqu', 'ho?', ('M', 'O'))\n",
      "('6hqu', 'ho?', ('N', 'O'))\n",
      "('1y97', 'ho', ('A', 'B'))\n"
     ]
    }
   ],
   "source": [
    "for i in traverseSIFTS(data):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'6gy2': {'P53350': {'1': {'A': {'P53350': {'identity': 0.988,\n",
       "      'sifts_unp_range': '[[365,603]]',\n",
       "      'sifts_pdb_range': '[[7,245]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'B': {'P53350': {'identity': 0.988,\n",
       "      'sifts_unp_range': '[[365,603]]',\n",
       "      'sifts_pdb_range': '[[7,245]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}},\n",
       "  'P51587': {'2': {'C': {'P51587': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[194,210]]',\n",
       "      'sifts_pdb_range': '[[1,17]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'D': {'P51587': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[194,210]]',\n",
       "      'sifts_pdb_range': '[[1,17]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}}},\n",
       " '6hqu': {'P51587': {'2': {'I': {'P51587': {'identity': 0.893,\n",
       "      'sifts_unp_range': '[[1226,1253]]',\n",
       "      'sifts_pdb_range': '[[1,28]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'J': {'P51587': {'identity': 0.893,\n",
       "      'sifts_unp_range': '[[1226,1253]]',\n",
       "      'sifts_pdb_range': '[[1,28]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'K': {'P51587': {'identity': 0.893,\n",
       "      'sifts_unp_range': '[[1226,1253]]',\n",
       "      'sifts_pdb_range': '[[1,28]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'L': {'P51587': {'identity': 0.893,\n",
       "      'sifts_unp_range': '[[1226,1253]]',\n",
       "      'sifts_pdb_range': '[[1,28]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'M': {'P51587': {'identity': 0.893,\n",
       "      'sifts_unp_range': '[[1226,1253]]',\n",
       "      'sifts_pdb_range': '[[1,28]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'N': {'P51587': {'identity': 0.893,\n",
       "      'sifts_unp_range': '[[1226,1253]]',\n",
       "      'sifts_pdb_range': '[[1,28]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}},\n",
       "   '3': {'O': {'P51587': {'identity': 0.909,\n",
       "      'sifts_unp_range': '[[2054,2064]]',\n",
       "      'sifts_pdb_range': '[[5,15]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}}},\n",
       " '3eu7': {'Q86YC2': {'1': {'A': {'Q86YC2': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[835,1186]]',\n",
       "      'sifts_pdb_range': '[[5,356]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}},\n",
       "  'P51587': {'2': {'X': {'P51587': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[21,39]]',\n",
       "      'sifts_pdb_range': '[[1,19]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}}},\n",
       " '1y97': {'Q9BQ50': {'1': {'A': {'Q9BQ50': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[1,236]]',\n",
       "      'sifts_pdb_range': '[[3,238]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'},\n",
       "     'Q9BQ50-1': {'identity': 0.996,\n",
       "      'sifts_unp_range': '[[43,279]]',\n",
       "      'sifts_pdb_range': '[[2,238]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}},\n",
       "    'B': {'Q9BQ50': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[1,236]]',\n",
       "      'sifts_pdb_range': '[[3,238]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'},\n",
       "     'Q9BQ50-1': {'identity': 0.996,\n",
       "      'sifts_unp_range': '[[43,279]]',\n",
       "      'sifts_pdb_range': '[[2,238]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}}},\n",
       " '1n0w': {'Q06609': {'1': {'A': {'Q06609-3': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[97,258]]',\n",
       "      'sifts_pdb_range': '[[1,162]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'},\n",
       "     'Q06609-2': {'identity': 0.753,\n",
       "      'sifts_unp_range': '[[19,42],[43,45],[46,56],[57,242]]',\n",
       "      'sifts_pdb_range': '[[7,30],[34,36],[41,51],[60,243]]',\n",
       "      'sifts_range_tage': 'Insertion & Deletion',\n",
       "      'delete': 'False'},\n",
       "     'Q06609': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[97,339]]',\n",
       "      'sifts_pdb_range': '[[1,243]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'},\n",
       "     'Q06609-4': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[116,340]]',\n",
       "      'sifts_pdb_range': '[[19,243]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}},\n",
       "  'P51587': {'2': {'B': {'P51587': {'identity': 1.0,\n",
       "      'sifts_unp_range': '[[1517,1551]]',\n",
       "      'sifts_pdb_range': '[[1,35]]',\n",
       "      'sifts_range_tage': 'Safe',\n",
       "      'delete': 'False'}}}}}}"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# pprint(json.dumps(data, indent=4))\n",
    "json.loads(json.dumps(data, indent=4))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>Entry</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>UniProt</th>\n",
       "      <th>identity</th>\n",
       "      <th>sifts_unp_range</th>\n",
       "      <th>sifts_pdb_range</th>\n",
       "      <th>sifts_range_tage</th>\n",
       "      <th>delete</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6gy2</td>\n",
       "      <td>P53350</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>P53350</td>\n",
       "      <td>0.988</td>\n",
       "      <td>[[365,603]]</td>\n",
       "      <td>[[7,245]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6gy2</td>\n",
       "      <td>P53350</td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>P53350</td>\n",
       "      <td>0.988</td>\n",
       "      <td>[[365,603]]</td>\n",
       "      <td>[[7,245]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>6gy2</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>C</td>\n",
       "      <td>P51587</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[194,210]]</td>\n",
       "      <td>[[1,17]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6gy2</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>D</td>\n",
       "      <td>P51587</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[194,210]]</td>\n",
       "      <td>[[1,17]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>I</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.893</td>\n",
       "      <td>[[1226,1253]]</td>\n",
       "      <td>[[1,28]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>J</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.893</td>\n",
       "      <td>[[1226,1253]]</td>\n",
       "      <td>[[1,28]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>K</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.893</td>\n",
       "      <td>[[1226,1253]]</td>\n",
       "      <td>[[1,28]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>L</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.893</td>\n",
       "      <td>[[1226,1253]]</td>\n",
       "      <td>[[1,28]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>M</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.893</td>\n",
       "      <td>[[1226,1253]]</td>\n",
       "      <td>[[1,28]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>N</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.893</td>\n",
       "      <td>[[1226,1253]]</td>\n",
       "      <td>[[1,28]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>6hqu</td>\n",
       "      <td>P51587</td>\n",
       "      <td>3</td>\n",
       "      <td>O</td>\n",
       "      <td>P51587</td>\n",
       "      <td>0.909</td>\n",
       "      <td>[[2054,2064]]</td>\n",
       "      <td>[[5,15]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>3eu7</td>\n",
       "      <td>Q86YC2</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q86YC2</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[835,1186]]</td>\n",
       "      <td>[[5,356]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>3eu7</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>X</td>\n",
       "      <td>P51587</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[21,39]]</td>\n",
       "      <td>[[1,19]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1y97</td>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[1,236]]</td>\n",
       "      <td>[[3,238]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1y97</td>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[1,236]]</td>\n",
       "      <td>[[3,238]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1y97</td>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q9BQ50-1</td>\n",
       "      <td>0.996</td>\n",
       "      <td>[[43,279]]</td>\n",
       "      <td>[[2,238]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1y97</td>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>Q9BQ50-1</td>\n",
       "      <td>0.996</td>\n",
       "      <td>[[43,279]]</td>\n",
       "      <td>[[2,238]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>Q06609</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q06609-3</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[97,258]]</td>\n",
       "      <td>[[1,162]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>Q06609</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q06609-2</td>\n",
       "      <td>0.753</td>\n",
       "      <td>[[19,42],[43,45],[46,56],[57,242]]</td>\n",
       "      <td>[[7,30],[34,36],[41,51],[60,243]]</td>\n",
       "      <td>Insertion &amp; Deletion</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>Q06609</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q06609</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[97,339]]</td>\n",
       "      <td>[[1,243]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>Q06609</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q06609-4</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[116,340]]</td>\n",
       "      <td>[[19,243]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>P51587</td>\n",
       "      <td>2</td>\n",
       "      <td>B</td>\n",
       "      <td>P51587</td>\n",
       "      <td>1.000</td>\n",
       "      <td>[[1517,1551]]</td>\n",
       "      <td>[[1,35]]</td>\n",
       "      <td>Safe</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pdb_id   Entry  entity_id chain_id   UniProt  identity  \\\n",
       "0    6gy2  P53350          1        A    P53350     0.988   \n",
       "1    6gy2  P53350          1        B    P53350     0.988   \n",
       "2    6gy2  P51587          2        C    P51587     1.000   \n",
       "3    6gy2  P51587          2        D    P51587     1.000   \n",
       "4    6hqu  P51587          2        I    P51587     0.893   \n",
       "5    6hqu  P51587          2        J    P51587     0.893   \n",
       "6    6hqu  P51587          2        K    P51587     0.893   \n",
       "7    6hqu  P51587          2        L    P51587     0.893   \n",
       "8    6hqu  P51587          2        M    P51587     0.893   \n",
       "9    6hqu  P51587          2        N    P51587     0.893   \n",
       "10   6hqu  P51587          3        O    P51587     0.909   \n",
       "11   3eu7  Q86YC2          1        A    Q86YC2     1.000   \n",
       "12   3eu7  P51587          2        X    P51587     1.000   \n",
       "13   1y97  Q9BQ50          1        A    Q9BQ50     1.000   \n",
       "14   1y97  Q9BQ50          1        B    Q9BQ50     1.000   \n",
       "15   1y97  Q9BQ50          1        A  Q9BQ50-1     0.996   \n",
       "16   1y97  Q9BQ50          1        B  Q9BQ50-1     0.996   \n",
       "17   1n0w  Q06609          1        A  Q06609-3     1.000   \n",
       "18   1n0w  Q06609          1        A  Q06609-2     0.753   \n",
       "19   1n0w  Q06609          1        A    Q06609     1.000   \n",
       "20   1n0w  Q06609          1        A  Q06609-4     1.000   \n",
       "21   1n0w  P51587          2        B    P51587     1.000   \n",
       "\n",
       "                       sifts_unp_range                    sifts_pdb_range  \\\n",
       "0                          [[365,603]]                          [[7,245]]   \n",
       "1                          [[365,603]]                          [[7,245]]   \n",
       "2                          [[194,210]]                           [[1,17]]   \n",
       "3                          [[194,210]]                           [[1,17]]   \n",
       "4                        [[1226,1253]]                           [[1,28]]   \n",
       "5                        [[1226,1253]]                           [[1,28]]   \n",
       "6                        [[1226,1253]]                           [[1,28]]   \n",
       "7                        [[1226,1253]]                           [[1,28]]   \n",
       "8                        [[1226,1253]]                           [[1,28]]   \n",
       "9                        [[1226,1253]]                           [[1,28]]   \n",
       "10                       [[2054,2064]]                           [[5,15]]   \n",
       "11                        [[835,1186]]                          [[5,356]]   \n",
       "12                           [[21,39]]                           [[1,19]]   \n",
       "13                           [[1,236]]                          [[3,238]]   \n",
       "14                           [[1,236]]                          [[3,238]]   \n",
       "15                          [[43,279]]                          [[2,238]]   \n",
       "16                          [[43,279]]                          [[2,238]]   \n",
       "17                          [[97,258]]                          [[1,162]]   \n",
       "18  [[19,42],[43,45],[46,56],[57,242]]  [[7,30],[34,36],[41,51],[60,243]]   \n",
       "19                          [[97,339]]                          [[1,243]]   \n",
       "20                         [[116,340]]                         [[19,243]]   \n",
       "21                       [[1517,1551]]                           [[1,35]]   \n",
       "\n",
       "        sifts_range_tage  delete  \n",
       "0                   Safe   False  \n",
       "1                   Safe   False  \n",
       "2                   Safe   False  \n",
       "3                   Safe   False  \n",
       "4                   Safe   False  \n",
       "5                   Safe   False  \n",
       "6                   Safe   False  \n",
       "7                   Safe   False  \n",
       "8                   Safe   False  \n",
       "9                   Safe   False  \n",
       "10                  Safe   False  \n",
       "11                  Safe   False  \n",
       "12                  Safe   False  \n",
       "13                  Safe   False  \n",
       "14                  Safe   False  \n",
       "15                  Safe   False  \n",
       "16                  Safe   False  \n",
       "17                  Safe   False  \n",
       "18  Insertion & Deletion   False  \n",
       "19                  Safe   False  \n",
       "20                  Safe   False  \n",
       "21                  Safe   False  "
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm[['pdb_id', 'Entry', 'entity_id', 'chain_id' ,'UniProt', 'identity','sifts_unp_range', 'sifts_pdb_range', 'sifts_range_tage', 'delete']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1N0W_A with Q06609-2\n",
    "\n",
    "```clustal\n",
    "CLUSTAL O(1.2.4) multiple sequence alignment\n",
    "\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      ------------SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAVTC\n",
    "sp|Q06609-2|RAD51_HUMAN          MAMQMQLEANADTSVEEESFGPQPISRLEQCGINANDVKKLEE-------AGFHTVEAVA\n",
    "                                             :.: : : * : :.:* * **::..:.::            **: ...\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      QLPIDRGGGEGKAMYIDTEGTFRPERLLAVAERYGLSGSDVLDNVAYARAFNTDHQTQLL\n",
    "sp|Q06609-2|RAD51_HUMAN          YA------PKKELINIKGISEAKADKILAVAERYGLSGSDVLDNVAYARAFNTDHQTQLL\n",
    "                                          : : : *.  .  : :::*********************************\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      YQASAMMVESRYALLIVDSATALYRTDYSGRGELSARQMHLARFLRMLLRLADEFGVAVV\n",
    "sp|Q06609-2|RAD51_HUMAN          YQASAMMVESRYALLIVDSATALYRTDYSGRGELSARQMHLARFLRMLLRLADEFGVAVV\n",
    "                                 ************************************************************\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      ITNQVVAQVDGAAMFAADPKKPIGGNIIAHASTTRLYLRKGRGETRICKIYDSPCLPEAE\n",
    "sp|Q06609-2|RAD51_HUMAN          ITNQVVAQVDGAAMFAADPKKPIGGNIIAHASTTRLYLRKGRGETRICKIYDSPCLPEAE\n",
    "                                 ************************************************************\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      AMFAINADGVGDAKD\n",
    "sp|Q06609-2|RAD51_HUMAN          AMFAINADGVGDAKD\n",
    "                                 ***************\n",
    "```\n",
    "\n",
    "### 1N0W_A with Q06609 Entry\n",
    "\n",
    "```clustal\n",
    "CLUSTAL O(1.2.4) multiple sequence alignment\n",
    "\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      ------------------------------------------------------------\n",
    "sp|Q06609|RAD51_HUMAN            MAMQMQLEANADTSVEEESFGPQPISRLEQCGINANDVKKLEEAGFHTVEAVAYAPKKEL\n",
    "sp|Q06609-2|RAD51_HUMAN          MAMQMQLEANADTSVEEESFGPQPISRLEQCGINANDVKKLEEAGFHTVEAVAYAPKKEL\n",
    "sp|Q06609-3|RAD51_HUMAN          MAMQMQLEANADTSVEEESFGPQPISRLEQCGINANDVKKLEEAGFHTVEAVAYAPKKEL\n",
    "sp|Q06609-4|RAD51_HUMAN          MAMQMQLEANADTSVEEESFGPQPISRLEQCGINANDVKKLEEAGFHTVEAVAYAPKKEL\n",
    "                                                                                             \n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      -------------------------------------SEIIQITTGSKELDKLLQGGIET\n",
    "sp|Q06609|RAD51_HUMAN            INIKGISEAKADKILAEAAKLVPMGFTTATEF-HQRRSEIIQITTGSKELDKLLQGGIET\n",
    "sp|Q06609-2|RAD51_HUMAN          INIKGISEAKADKI----------------------------------------------\n",
    "sp|Q06609-3|RAD51_HUMAN          INIKGISEAKADKILAEAAKLVPMGFTTATEF-HQRRSEIIQITTGSKELDKLLQGGIET\n",
    "sp|Q06609-4|RAD51_HUMAN          INIKGISEAKADKILTESRSVARLECNSVILVYCTLRLSGSSDSPASASRVVGTTGGIET\n",
    "                                                                                             \n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      GSITEMFGEFRTGKTQICHTLAVTCQLPIDRGGGEGKAMYIDTEGTFRPERLLAVAERYG\n",
    "sp|Q06609|RAD51_HUMAN            GSITEMFGEFRTGKTQICHTLAVTCQLPIDRGGGEGKAMYIDTEGTFRPERLLAVAERYG\n",
    "sp|Q06609-2|RAD51_HUMAN          ----------------------------------------------------LAVAERYG\n",
    "sp|Q06609-3|RAD51_HUMAN          GSITEMFGEFRTGKTQICHTLAVTCQLPIDRGGGEGKAMYIDTEGTFRPERLLAVAERYG\n",
    "sp|Q06609-4|RAD51_HUMAN          GSITEMFGEFRTGKTQICHTLAVTCQLPIDRGGGEGKAMYIDTEGTFRPERLLAVAERYG\n",
    "                                                                                     ********\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      LSGSDVLDNVAYARAFNTDHQTQLLYQASAMMVESRYALLIVDSATALYRTDYSGRGELS\n",
    "sp|Q06609|RAD51_HUMAN            LSGSDVLDNVAYARAFNTDHQTQLLYQASAMMVESRYALLIVDSATALYRTDYSGRGELS\n",
    "sp|Q06609-2|RAD51_HUMAN          LSGSDVLDNVAYARAFNTDHQTQLLYQASAMMVESRYALLIVDSATALYRTDYSGRGELS\n",
    "sp|Q06609-3|RAD51_HUMAN          LSGSDVLDNVAYARAFNTDHQTQLLYQASAMMVESRYALLIVDSATALYRTDYSGRGELS\n",
    "sp|Q06609-4|RAD51_HUMAN          LSGSDVLDNVAYARAFNTDHQTQLLYQASAMMVESRYALLIVDSATALYRTDYSGRGELS\n",
    "                                 ************************************************************\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      ARQMHLARFLRMLLRLADEFGVAVVITNQVVAQVDGAAMFAADPKKPIGGNIIAHASTTR\n",
    "sp|Q06609|RAD51_HUMAN            ARQMHLARFLRMLLRLADEFGVAVVITNQVVAQVDGAAMFAADPKKPIGGNIIAHASTTR\n",
    "sp|Q06609-2|RAD51_HUMAN          ARQMHLARFLRMLLRLADEFGVAVVITNQVVAQVDGAAMFAADPKKPIGGNIIAHASTTR\n",
    "sp|Q06609-3|RAD51_HUMAN          ARQMHLARFLRMLLRLADEIVSEERKRGNQNLQN------------------------LR\n",
    "sp|Q06609-4|RAD51_HUMAN          ARQMHLARFLRMLLRLADEFGVAVVITNQVVAQVDGAAMFAADPKKPIGGNIIAHASTTR\n",
    "                                 *******************:       .:   *                          *\n",
    "\n",
    "1N0W:A|PDBID|CHAIN|SEQUENCE      LYLRKGRGETRICKIYDSPCLPEAEAMFAINADGVGDAKD\n",
    "sp|Q06609|RAD51_HUMAN            LYLRKGRGETRICKIYDSPCLPEAEAMFAINADGVGDAKD\n",
    "sp|Q06609-2|RAD51_HUMAN          LYLRKGRGETRICKIYDSPCLPEAEAMFAINADGVGDAKD\n",
    "sp|Q06609-3|RAD51_HUMAN          LSLSS-----------------------------------\n",
    "sp|Q06609-4|RAD51_HUMAN          LYLRKGRGETRICKIYDSPCLPEAEAMFAINADGVGDAKD\n",
    "                                 * * .                      \n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d1adeb5a0ecb4d7eae516dc7af7a0818",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import nglview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b08270701345452c8420d0bad6335f38",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "view = nglview.show_pdbid(\"10gs\")\n",
    "view"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "622d992fe2b24dcea8c1271e502bffd5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "view2 = nglview.show_pdbid(\"5dcg\")\n",
    "view2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "# view.add_representation('line', selection='protein')\n",
    "# view.add_representation('ball+stick', selection='ligand')\n",
    "view2.add_representation('surface', selection='protein', opacity=0.1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test for retrieve UniProt FASTA files\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "UniProtFASTA.init_logger()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 3/3 [00:07<00:00,  2.59s/it]\n",
      "2020-02-19 12:05:09,243 Muta3DMaps.core.log INFO 3 ids downloaded in 7.80s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['C:\\\\GitWorks\\\\Muta3DMaps\\\\Muta3DMaps\\\\test\\\\data\\\\uniprot\\\\Q9BQ50-1.fasta',\n",
       " 'C:\\\\GitWorks\\\\Muta3DMaps\\\\Muta3DMaps\\\\test\\\\data\\\\uniprot\\\\Q9BQ50.fasta',\n",
       " 'C:\\\\GitWorks\\\\Muta3DMaps\\\\Muta3DMaps\\\\test\\\\data\\\\uniprot\\\\P51587.fasta']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "UniProtFASTA.retrieve(\n",
    "    lyst=['P51587', 'Q9BQ50-1', 'Q9BQ50'],\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data\\uniprot'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test for filtering dfrm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from typing import Optional, Union, Dict, Tuple, Iterable\n",
    "from pandas import read_csv, DataFrame\n",
    "from pathlib import Path\n",
    "\n",
    "def related_dataframe(filters: Optional[Union[Dict, Iterable[Tuple]]] = None, dfrm: Optional[DataFrame] = None, path: Union[str, Path, None] = None, sep: str = '\\t'):\n",
    "    if dfrm is None:\n",
    "        if path is not None:\n",
    "            dfrm = read_csv(path, sep=sep)\n",
    "        else:\n",
    "            raise ValueError('path should not be None')\n",
    "    elif not isinstance(dfrm, DataFrame):\n",
    "        raise ValueError('dfrm should be a pandas.DataFrame')\n",
    "\n",
    "    if filters is None:\n",
    "        return dfrm\n",
    "    elif isinstance(filters, Dict):\n",
    "        filters = filters.items()\n",
    "\n",
    "    for col, (symbol, value) in filters:\n",
    "        dfrm = dfrm[getattr(getattr(dfrm, col), symbol)(value)]\n",
    "    return dfrm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "isna() takes 1 positional argument but 2 were given",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-27-8cb414928d58>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      6\u001b[0m ]\n\u001b[0;32m      7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[0mrelated_dataframe\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilters\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mfilters\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdfrm\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdfrm\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-14-b24f3b304e9a>\u001b[0m in \u001b[0;36mrelated_dataframe\u001b[1;34m(filters, dfrm, path, sep)\u001b[0m\n\u001b[0;32m     18\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     19\u001b[0m     \u001b[1;32mfor\u001b[0m \u001b[0mcol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0msymbol\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mfilters\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 20\u001b[1;33m         \u001b[0mdfrm\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdfrm\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdfrm\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msymbol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     21\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mdfrm\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: isna() takes 1 positional argument but 2 were given"
     ]
    }
   ],
   "source": [
    "filters = [\n",
    "    ('sifts_range_tage', ('isin', ['Safe', 'Insertion'])),\n",
    "    ('delete', ('ne', True)),\n",
    "    ('identity', ('ge', 0.9))\n",
    "]\n",
    "\n",
    "related_dataframe(filters=filters, dfrm=dfrm)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Test for ProcessEntryData Basic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "ProcessEntryData.init_logger()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.85s/it]\n",
      "2020-02-22 11:56:58,061 ProcessEntryData INFO 1 ids downloaded in 2.86s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>assemblies</th>\n",
       "      <th>deposition_date</th>\n",
       "      <th>deposition_site</th>\n",
       "      <th>entry_authors</th>\n",
       "      <th>experimental_method</th>\n",
       "      <th>experimental_method_class</th>\n",
       "      <th>number_of_entities</th>\n",
       "      <th>processing_site</th>\n",
       "      <th>related_structures</th>\n",
       "      <th>release_date</th>\n",
       "      <th>revision_date</th>\n",
       "      <th>split_entry</th>\n",
       "      <th>title</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>[{\"preferred\":true,\"form\":\"hetero\",\"name\":\"dim...</td>\n",
       "      <td>20081009</td>\n",
       "      <td>RCSB</td>\n",
       "      <td>[\"Oliver, A.W.\",\"Pearl, L.H.\"]</td>\n",
       "      <td>[\"X-ray diffraction\"]</td>\n",
       "      <td>[\"x-ray\"]</td>\n",
       "      <td>{\"water\":1,\"polypeptide\":2,\"other\":0,\"dna\":0,\"...</td>\n",
       "      <td>PDBJ</td>\n",
       "      <td>[]</td>\n",
       "      <td>20090728</td>\n",
       "      <td>20141112</td>\n",
       "      <td>[]</td>\n",
       "      <td>Crystal Structure of a PALB2 / BRCA2 complex</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>[{\"preferred\":true,\"form\":\"hetero\",\"name\":\"dim...</td>\n",
       "      <td>20180925</td>\n",
       "      <td>PDBE</td>\n",
       "      <td>[\"Pantelejevs, T.\",\"Lindenburg, L.\",\"Hyvonen, ...</td>\n",
       "      <td>[\"X-ray diffraction\"]</td>\n",
       "      <td>[\"x-ray\"]</td>\n",
       "      <td>{\"water\":1,\"polypeptide\":3,\"other\":0,\"dna\":0,\"...</td>\n",
       "      <td>PDBE</td>\n",
       "      <td>[]</td>\n",
       "      <td>20191009</td>\n",
       "      <td>20191009</td>\n",
       "      <td>[]</td>\n",
       "      <td>Humanised RadA mutant HumRadA22 in complex wit...</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>[{\"preferred\":true,\"form\":\"hetero\",\"name\":\"dim...</td>\n",
       "      <td>20180628</td>\n",
       "      <td>PDBE</td>\n",
       "      <td>[\"Miron, S.\",\"Ropars, V.\",\"Zinn-Justin, S.\"]</td>\n",
       "      <td>[\"X-ray diffraction\"]</td>\n",
       "      <td>[\"x-ray\"]</td>\n",
       "      <td>{\"water\":1,\"polypeptide\":2,\"other\":0,\"dna\":0,\"...</td>\n",
       "      <td>PDBE</td>\n",
       "      <td>[]</td>\n",
       "      <td>20190710</td>\n",
       "      <td>20190710</td>\n",
       "      <td>[]</td>\n",
       "      <td>Crystal structure of human Plk1-PBD in complex...</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>[{\"preferred\":true,\"form\":\"homo\",\"name\":\"dimer...</td>\n",
       "      <td>20041214</td>\n",
       "      <td>RCSB</td>\n",
       "      <td>[\"Perrino, F.W.\",\"Harvey, S.\",\"McMillin, S.\",\"...</td>\n",
       "      <td>[\"X-ray diffraction\"]</td>\n",
       "      <td>[\"x-ray\"]</td>\n",
       "      <td>{\"water\":1,\"polypeptide\":1,\"other\":0,\"dna\":0,\"...</td>\n",
       "      <td>RCSB</td>\n",
       "      <td>[]</td>\n",
       "      <td>20050125</td>\n",
       "      <td>20110713</td>\n",
       "      <td>[]</td>\n",
       "      <td>The human TREX2 3' exonuclease structure sugge...</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>[{\"preferred\":true,\"form\":\"hetero\",\"name\":\"tet...</td>\n",
       "      <td>20021015</td>\n",
       "      <td>RCSB</td>\n",
       "      <td>[\"Pellegrini, L.\",\"Yu, D.S.\",\"Lo, T.\",\"Anand, ...</td>\n",
       "      <td>[\"X-ray diffraction\"]</td>\n",
       "      <td>[\"x-ray\"]</td>\n",
       "      <td>{\"water\":1,\"polypeptide\":4,\"other\":0,\"dna\":0,\"...</td>\n",
       "      <td>RCSB</td>\n",
       "      <td>[]</td>\n",
       "      <td>20021127</td>\n",
       "      <td>20200122</td>\n",
       "      <td>[]</td>\n",
       "      <td>Crystal structure of a RAD51-BRCA2 BRC repeat ...</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          assemblies  deposition_date  \\\n",
       "0  [{\"preferred\":true,\"form\":\"hetero\",\"name\":\"dim...         20081009   \n",
       "1  [{\"preferred\":true,\"form\":\"hetero\",\"name\":\"dim...         20180925   \n",
       "2  [{\"preferred\":true,\"form\":\"hetero\",\"name\":\"dim...         20180628   \n",
       "3  [{\"preferred\":true,\"form\":\"homo\",\"name\":\"dimer...         20041214   \n",
       "4  [{\"preferred\":true,\"form\":\"hetero\",\"name\":\"tet...         20021015   \n",
       "\n",
       "  deposition_site                                      entry_authors  \\\n",
       "0            RCSB                     [\"Oliver, A.W.\",\"Pearl, L.H.\"]   \n",
       "1            PDBE  [\"Pantelejevs, T.\",\"Lindenburg, L.\",\"Hyvonen, ...   \n",
       "2            PDBE       [\"Miron, S.\",\"Ropars, V.\",\"Zinn-Justin, S.\"]   \n",
       "3            RCSB  [\"Perrino, F.W.\",\"Harvey, S.\",\"McMillin, S.\",\"...   \n",
       "4            RCSB  [\"Pellegrini, L.\",\"Yu, D.S.\",\"Lo, T.\",\"Anand, ...   \n",
       "\n",
       "     experimental_method experimental_method_class  \\\n",
       "0  [\"X-ray diffraction\"]                 [\"x-ray\"]   \n",
       "1  [\"X-ray diffraction\"]                 [\"x-ray\"]   \n",
       "2  [\"X-ray diffraction\"]                 [\"x-ray\"]   \n",
       "3  [\"X-ray diffraction\"]                 [\"x-ray\"]   \n",
       "4  [\"X-ray diffraction\"]                 [\"x-ray\"]   \n",
       "\n",
       "                                  number_of_entities processing_site  \\\n",
       "0  {\"water\":1,\"polypeptide\":2,\"other\":0,\"dna\":0,\"...            PDBJ   \n",
       "1  {\"water\":1,\"polypeptide\":3,\"other\":0,\"dna\":0,\"...            PDBE   \n",
       "2  {\"water\":1,\"polypeptide\":2,\"other\":0,\"dna\":0,\"...            PDBE   \n",
       "3  {\"water\":1,\"polypeptide\":1,\"other\":0,\"dna\":0,\"...            RCSB   \n",
       "4  {\"water\":1,\"polypeptide\":4,\"other\":0,\"dna\":0,\"...            RCSB   \n",
       "\n",
       "  related_structures  release_date  revision_date split_entry  \\\n",
       "0                 []      20090728       20141112          []   \n",
       "1                 []      20191009       20191009          []   \n",
       "2                 []      20190710       20190710          []   \n",
       "3                 []      20050125       20110713          []   \n",
       "4                 []      20021127       20200122          []   \n",
       "\n",
       "                                               title pdb_id  \n",
       "0       Crystal Structure of a PALB2 / BRCA2 complex   3eu7  \n",
       "1  Humanised RadA mutant HumRadA22 in complex wit...   6hqu  \n",
       "2  Crystal structure of human Plk1-PBD in complex...   6gy2  \n",
       "3  The human TREX2 3' exonuclease structure sugge...   1y97  \n",
       "4  Crystal structure of a RAD51-BRCA2 BRC repeat ...   1n0w  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm2 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/summary/',\n",
    "    method='post',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'dna': 0,\n",
      " 'dna/rna': 0,\n",
      " 'ligand': 1,\n",
      " 'other': 0,\n",
      " 'polypeptide': 2,\n",
      " 'rna': 0,\n",
      " 'sugar': 0,\n",
      " 'water': 1}\n",
      "\n",
      "{'dna': 0,\n",
      " 'dna/rna': 0,\n",
      " 'ligand': 2,\n",
      " 'other': 0,\n",
      " 'polypeptide': 3,\n",
      " 'rna': 0,\n",
      " 'sugar': 0,\n",
      " 'water': 1}\n",
      "\n",
      "{'dna': 0,\n",
      " 'dna/rna': 0,\n",
      " 'ligand': 1,\n",
      " 'other': 0,\n",
      " 'polypeptide': 2,\n",
      " 'rna': 0,\n",
      " 'sugar': 0,\n",
      " 'water': 1}\n",
      "\n",
      "{'dna': 0,\n",
      " 'dna/rna': 0,\n",
      " 'ligand': 0,\n",
      " 'other': 0,\n",
      " 'polypeptide': 1,\n",
      " 'rna': 0,\n",
      " 'sugar': 0,\n",
      " 'water': 1}\n",
      "\n",
      "{'dna': 0,\n",
      " 'dna/rna': 0,\n",
      " 'ligand': 3,\n",
      " 'other': 0,\n",
      " 'polypeptide': 4,\n",
      " 'rna': 0,\n",
      " 'sugar': 0,\n",
      " 'water': 1}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in dfrm2.number_of_entities:\n",
    "    pprint(json.loads(i))\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.52s/it]\n",
      "2020-02-22 11:57:04,161 ProcessEntryData INFO 1 ids downloaded in 1.53s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ca_p_only</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>gene_name</th>\n",
       "      <th>in_chains</th>\n",
       "      <th>in_struct_asyms</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>mutation_flag</th>\n",
       "      <th>number_of_copies</th>\n",
       "      <th>pdb_sequence</th>\n",
       "      <th>pdb_sequence_indices_with_multiple_residues</th>\n",
       "      <th>sample_preparation</th>\n",
       "      <th>sequence</th>\n",
       "      <th>source</th>\n",
       "      <th>synonym</th>\n",
       "      <th>weight</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"FANCN\",\"PALB2\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>356.0</td>\n",
       "      <td>[\"Partner and localizer of BRCA2\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>{\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Partner and localizer of BRCA2</td>\n",
       "      <td>39195.793</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"X\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>19.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>{}</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>19meric peptide from Breast cancer type 2 susc...</td>\n",
       "      <td>2107.297</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"GLYCEROL\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.094</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"X\"]</td>\n",
       "      <td>[\"E\",\"F\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"water\"]</td>\n",
       "      <td>Water</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natural source</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.015</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>231.0</td>\n",
       "      <td>[\"DNA repair and recombination protein RadA\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair and recombination protein RadA</td>\n",
       "      <td>25542.064</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]</td>\n",
       "      <td>[\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]</td>\n",
       "      <td>38.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"O\"]</td>\n",
       "      <td>[\"O\"]</td>\n",
       "      <td>15.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>1531.710</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>[\"BA\",\"DA\",\"P\",\"R\",\"T\",\"V\",\"X\",\"Z\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"ADENOSINE-5'-DIPHOSPHATE\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>427.201</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>False</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>[\"AA\",\"CA\",\"EA\",\"Q\",\"S\",\"U\",\"W\",\"Y\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"MAGNESIUM ION\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natural source</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.305</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>False</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"I\",\"J\",\"K\",\"L\"]</td>\n",
       "      <td>[\"FA\",\"GA\",\"HA\",\"IA\",\"JA\",\"KA\",\"LA\",\"MA\",\"NA\",...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"water\"]</td>\n",
       "      <td>Water</td>\n",
       "      <td>NaN</td>\n",
       "      <td>214</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natural source</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.015</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"PLK\",\"PLK1\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>245.0</td>\n",
       "      <td>[\"Serine\\/threonine-protein kinase PLK1\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...</td>\n",
       "      <td>Serine/threonine-protein kinase PLK1</td>\n",
       "      <td>28193.098</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>17.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>WSSSLATPPTLSS(TPO)VLI</td>\n",
       "      <td>{\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>WSSSLATPPTLSSTVLI</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Phosphopeptide of BRCA2</td>\n",
       "      <td>1839.975</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"E\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"GLYCEROL\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.094</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\"]</td>\n",
       "      <td>[\"F\",\"G\",\"H\",\"I\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"water\"]</td>\n",
       "      <td>Water</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natural source</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.015</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"RAD51\",\"RAD51A\",\"RECA\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>243.0</td>\n",
       "      <td>[\"DNA repair protein RAD51 homolog 1\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"147\":{\"three_letter_code\":\"MSE\",\"parent_chem...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>35.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility protein</td>\n",
       "      <td>3966.598</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"L\"]</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>14.0</td>\n",
       "      <td>[\"peptide linker\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>TGSTGSTGSTGS(MSE)G</td>\n",
       "      <td>{\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>TGSTGSTGSTGSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>peptide linker</td>\n",
       "      <td>1234.091</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>[\"D\"]</td>\n",
       "      <td>4.0</td>\n",
       "      <td>[\"ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GS(MSE)G</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE</td>\n",
       "      <td>397.287</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>False</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"E\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"MAGNESIUM ION\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.305</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>False</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"F\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"CHLORIDE ION\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35.453</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>False</td>\n",
       "      <td>7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"G\",\"H\",\"I\",\"J\",\"K\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"1,2-ETHANEDIOL\"]</td>\n",
       "      <td>Bound</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>62.068</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>False</td>\n",
       "      <td>8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"L\"]</td>\n",
       "      <td>[\"L\",\"M\",\"N\",\"O\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"water\"]</td>\n",
       "      <td>Water</td>\n",
       "      <td>NaN</td>\n",
       "      <td>247</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natural source</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.015</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"TREX2\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>238.0</td>\n",
       "      <td>[\"Three prime repair exonuclease 2\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"66\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"water\"]</td>\n",
       "      <td>Water</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Natural source</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.015</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    ca_p_only  entity_id                  gene_name  \\\n",
       "0       False          1          [\"FANCN\",\"PALB2\"]   \n",
       "1       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "2       False          3                        NaN   \n",
       "3       False          4                        NaN   \n",
       "4       False          1                        NaN   \n",
       "5       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "6       False          3  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "7       False          4                        NaN   \n",
       "8       False          5                        NaN   \n",
       "9       False          6                        NaN   \n",
       "10      False          1             [\"PLK\",\"PLK1\"]   \n",
       "11      False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "12      False          3                        NaN   \n",
       "13      False          4                        NaN   \n",
       "14      False          1  [\"RAD51\",\"RAD51A\",\"RECA\"]   \n",
       "15      False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "16      False          3                        NaN   \n",
       "17      False          4                        NaN   \n",
       "18      False          5                        NaN   \n",
       "19      False          6                        NaN   \n",
       "20      False          7                        NaN   \n",
       "21      False          8                        NaN   \n",
       "22      False          1                  [\"TREX2\"]   \n",
       "23      False          2                        NaN   \n",
       "\n",
       "                                        in_chains  \\\n",
       "0                                           [\"A\"]   \n",
       "1                                           [\"X\"]   \n",
       "2                                           [\"A\"]   \n",
       "3                                       [\"A\",\"X\"]   \n",
       "4               [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]   \n",
       "5                       [\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]   \n",
       "6                                           [\"O\"]   \n",
       "7               [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]   \n",
       "8               [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]   \n",
       "9   [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"I\",\"J\",\"K\",\"L\"]   \n",
       "10                                      [\"A\",\"B\"]   \n",
       "11                                      [\"C\",\"D\"]   \n",
       "12                                          [\"A\"]   \n",
       "13                              [\"A\",\"B\",\"C\",\"D\"]   \n",
       "14                                          [\"A\"]   \n",
       "15                                          [\"B\"]   \n",
       "16                                          [\"L\"]   \n",
       "17                                          [\"C\"]   \n",
       "18                                          [\"A\"]   \n",
       "19                                          [\"A\"]   \n",
       "20                                          [\"A\"]   \n",
       "21                              [\"A\",\"B\",\"C\",\"L\"]   \n",
       "22                                      [\"A\",\"B\"]   \n",
       "23                                      [\"A\",\"B\"]   \n",
       "\n",
       "                                      in_struct_asyms  length  \\\n",
       "0                                               [\"A\"]   356.0   \n",
       "1                                               [\"B\"]    19.0   \n",
       "2                                           [\"C\",\"D\"]     NaN   \n",
       "3                                           [\"E\",\"F\"]     NaN   \n",
       "4                   [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]   231.0   \n",
       "5                           [\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]    38.0   \n",
       "6                                               [\"O\"]    15.0   \n",
       "7                 [\"BA\",\"DA\",\"P\",\"R\",\"T\",\"V\",\"X\",\"Z\"]     NaN   \n",
       "8                [\"AA\",\"CA\",\"EA\",\"Q\",\"S\",\"U\",\"W\",\"Y\"]     NaN   \n",
       "9   [\"FA\",\"GA\",\"HA\",\"IA\",\"JA\",\"KA\",\"LA\",\"MA\",\"NA\",...     NaN   \n",
       "10                                          [\"A\",\"B\"]   245.0   \n",
       "11                                          [\"C\",\"D\"]    17.0   \n",
       "12                                              [\"E\"]     NaN   \n",
       "13                                  [\"F\",\"G\",\"H\",\"I\"]     NaN   \n",
       "14                                              [\"A\"]   243.0   \n",
       "15                                              [\"B\"]    35.0   \n",
       "16                                              [\"C\"]    14.0   \n",
       "17                                              [\"D\"]     4.0   \n",
       "18                                              [\"E\"]     NaN   \n",
       "19                                              [\"F\"]     NaN   \n",
       "20                              [\"G\",\"H\",\"I\",\"J\",\"K\"]     NaN   \n",
       "21                                  [\"L\",\"M\",\"N\",\"O\"]     NaN   \n",
       "22                                          [\"A\",\"B\"]   238.0   \n",
       "23                                          [\"C\",\"D\"]     NaN   \n",
       "\n",
       "                                      molecule_name   molecule_type  \\\n",
       "0                [\"Partner and localizer of BRCA2\"]  polypeptide(L)   \n",
       "1   [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "2                                      [\"GLYCEROL\"]           Bound   \n",
       "3                                         [\"water\"]           Water   \n",
       "4     [\"DNA repair and recombination protein RadA\"]  polypeptide(L)   \n",
       "5   [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "6   [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "7                      [\"ADENOSINE-5'-DIPHOSPHATE\"]           Bound   \n",
       "8                                 [\"MAGNESIUM ION\"]           Bound   \n",
       "9                                         [\"water\"]           Water   \n",
       "10        [\"Serine\\/threonine-protein kinase PLK1\"]  polypeptide(L)   \n",
       "11  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "12                                     [\"GLYCEROL\"]           Bound   \n",
       "13                                        [\"water\"]           Water   \n",
       "14           [\"DNA repair protein RAD51 homolog 1\"]  polypeptide(L)   \n",
       "15  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "16                               [\"peptide linker\"]  polypeptide(L)   \n",
       "17           [\"ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE\"]  polypeptide(L)   \n",
       "18                                [\"MAGNESIUM ION\"]           Bound   \n",
       "19                                 [\"CHLORIDE ION\"]           Bound   \n",
       "20                               [\"1,2-ETHANEDIOL\"]           Bound   \n",
       "21                                        [\"water\"]           Water   \n",
       "22             [\"Three prime repair exonuclease 2\"]  polypeptide(L)   \n",
       "23                                        [\"water\"]           Water   \n",
       "\n",
       "    mutation_flag  number_of_copies  \\\n",
       "0             NaN                 1   \n",
       "1             NaN                 1   \n",
       "2             NaN                 2   \n",
       "3             NaN                98   \n",
       "4             NaN                 8   \n",
       "5             NaN                 6   \n",
       "6             NaN                 1   \n",
       "7             NaN                 8   \n",
       "8             NaN                 8   \n",
       "9             NaN               214   \n",
       "10            NaN                 2   \n",
       "11            NaN                 2   \n",
       "12            NaN                 1   \n",
       "13            NaN                22   \n",
       "14            NaN                 1   \n",
       "15            NaN                 1   \n",
       "16            NaN                 1   \n",
       "17            NaN                 1   \n",
       "18            NaN                 1   \n",
       "19            NaN                 1   \n",
       "20            NaN                 5   \n",
       "21            NaN               247   \n",
       "22            NaN                 2   \n",
       "23            NaN                77   \n",
       "\n",
       "                                         pdb_sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4   MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "5              KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "6                                     KLNVSFSGFSTASGK   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "11                              WSSSLATPPTLSS(TPO)VLI   \n",
       "12                                                NaN   \n",
       "13                                                NaN   \n",
       "14  SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "15                KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "16                                 TGSTGSTGSTGS(MSE)G   \n",
       "17                                           GS(MSE)G   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22  AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "23                                                NaN   \n",
       "\n",
       "          pdb_sequence_indices_with_multiple_residues  \\\n",
       "0   {\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...   \n",
       "1                                                  {}   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                  {}   \n",
       "5                                                  {}   \n",
       "6                                                  {}   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10                                                 {}   \n",
       "11  {\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...   \n",
       "12                                                NaN   \n",
       "13                                                NaN   \n",
       "14  {\"147\":{\"three_letter_code\":\"MSE\",\"parent_chem...   \n",
       "15                                                 {}   \n",
       "16  {\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "17  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22  {\"66\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "23                                                NaN   \n",
       "\n",
       "         sample_preparation  \\\n",
       "0   Genetically manipulated   \n",
       "1    Synthetically obtained   \n",
       "2    Synthetically obtained   \n",
       "3            Natural source   \n",
       "4   Genetically manipulated   \n",
       "5   Genetically manipulated   \n",
       "6   Genetically manipulated   \n",
       "7    Synthetically obtained   \n",
       "8            Natural source   \n",
       "9            Natural source   \n",
       "10  Genetically manipulated   \n",
       "11   Synthetically obtained   \n",
       "12   Synthetically obtained   \n",
       "13           Natural source   \n",
       "14  Genetically manipulated   \n",
       "15  Genetically manipulated   \n",
       "16  Genetically manipulated   \n",
       "17  Genetically manipulated   \n",
       "18   Synthetically obtained   \n",
       "19   Synthetically obtained   \n",
       "20   Synthetically obtained   \n",
       "21           Natural source   \n",
       "22  Genetically manipulated   \n",
       "23           Natural source   \n",
       "\n",
       "                                             sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4   MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "5              KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "6                                     KLNVSFSGFSTASGK   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "11                                  WSSSLATPPTLSSTVLI   \n",
       "12                                                NaN   \n",
       "13                                                NaN   \n",
       "14  SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "15                KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "16                                     TGSTGSTGSTGSMG   \n",
       "17                                               GSMG   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22  AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "23                                                NaN   \n",
       "\n",
       "                                               source  \\\n",
       "0   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "1   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "5   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "6   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10  [{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...   \n",
       "11  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "12                                                NaN   \n",
       "13                                                NaN   \n",
       "14  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "15  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "16                                                 []   \n",
       "17                                                 []   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22  [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "23                                                NaN   \n",
       "\n",
       "                                              synonym     weight pdb_id  \n",
       "0                      Partner and localizer of BRCA2  39195.793   3eu7  \n",
       "1   19meric peptide from Breast cancer type 2 susc...   2107.297   3eu7  \n",
       "2                                                 NaN     92.094   3eu7  \n",
       "3                                                 NaN     18.015   3eu7  \n",
       "4           DNA repair and recombination protein RadA  25542.064   6hqu  \n",
       "5                 Breast cancer type 2 susceptibility   3980.434   6hqu  \n",
       "6                 Breast cancer type 2 susceptibility   1531.710   6hqu  \n",
       "7                                                 NaN    427.201   6hqu  \n",
       "8                                                 NaN     24.305   6hqu  \n",
       "9                                                 NaN     18.015   6hqu  \n",
       "10               Serine/threonine-protein kinase PLK1  28193.098   6gy2  \n",
       "11                            Phosphopeptide of BRCA2   1839.975   6gy2  \n",
       "12                                                NaN     92.094   6gy2  \n",
       "13                                                NaN     18.015   6gy2  \n",
       "14                 DNA repair protein RAD51 homolog 1  26855.346   1n0w  \n",
       "15        Breast cancer type 2 susceptibility protein   3966.598   1n0w  \n",
       "16                                     peptide linker   1234.091   1n0w  \n",
       "17                 ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE    397.287   1n0w  \n",
       "18                                                NaN     24.305   1n0w  \n",
       "19                                                NaN     35.453   1n0w  \n",
       "20                                                NaN     62.068   1n0w  \n",
       "21                                                NaN     18.015   1n0w  \n",
       "22                   Three prime repair exonuclease 2  26224.346   1y97  \n",
       "23                                                NaN     18.015   1y97  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm3 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/molecules/',\n",
    "    method='post',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get Protein Entity\n",
    "\n",
    "```py\n",
    "pro_dfrm = dfrm3[dfrm3.molecule_type.isin(['polypeptide(L)', 'polypeptide(D)'])][['pdb_id', 'entity_id']]\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "pro_dfrm = dfrm3[dfrm3.molecule_type.isin(['polypeptide(L)', 'polypeptide(D)'])][['pdb_id', 'entity_id']]\n",
    "pro_full_dfrm = dfrm3[dfrm3.molecule_type.isin(['polypeptide(L)', 'polypeptide(D)'])].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ca_p_only</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>gene_name</th>\n",
       "      <th>in_chains</th>\n",
       "      <th>in_struct_asyms</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>mutation_flag</th>\n",
       "      <th>number_of_copies</th>\n",
       "      <th>pdb_sequence</th>\n",
       "      <th>pdb_sequence_indices_with_multiple_residues</th>\n",
       "      <th>sample_preparation</th>\n",
       "      <th>sequence</th>\n",
       "      <th>source</th>\n",
       "      <th>synonym</th>\n",
       "      <th>weight</th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>protein_entities_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"FANCN\",\"PALB2\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>356.0</td>\n",
       "      <td>[\"Partner and localizer of BRCA2\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>{\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Partner and localizer of BRCA2</td>\n",
       "      <td>39195.793</td>\n",
       "      <td>3eu7</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"X\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>19.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>{}</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>19meric peptide from Breast cancer type 2 susc...</td>\n",
       "      <td>2107.297</td>\n",
       "      <td>3eu7</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>231.0</td>\n",
       "      <td>[\"DNA repair and recombination protein RadA\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair and recombination protein RadA</td>\n",
       "      <td>25542.064</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]</td>\n",
       "      <td>[\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]</td>\n",
       "      <td>38.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"O\"]</td>\n",
       "      <td>[\"O\"]</td>\n",
       "      <td>15.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>1531.710</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"PLK\",\"PLK1\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>245.0</td>\n",
       "      <td>[\"Serine\\/threonine-protein kinase PLK1\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...</td>\n",
       "      <td>Serine/threonine-protein kinase PLK1</td>\n",
       "      <td>28193.098</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>17.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>WSSSLATPPTLSS(TPO)VLI</td>\n",
       "      <td>{\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>WSSSLATPPTLSSTVLI</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Phosphopeptide of BRCA2</td>\n",
       "      <td>1839.975</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"RAD51\",\"RAD51A\",\"RECA\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>243.0</td>\n",
       "      <td>[\"DNA repair protein RAD51 homolog 1\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"147\":{\"three_letter_code\":\"MSE\",\"parent_chem...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>35.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility protein</td>\n",
       "      <td>3966.598</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"L\"]</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>14.0</td>\n",
       "      <td>[\"peptide linker\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>TGSTGSTGSTGS(MSE)G</td>\n",
       "      <td>{\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>TGSTGSTGSTGSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>peptide linker</td>\n",
       "      <td>1234.091</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>[\"D\"]</td>\n",
       "      <td>4.0</td>\n",
       "      <td>[\"ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GS(MSE)G</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE</td>\n",
       "      <td>397.287</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"TREX2\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>238.0</td>\n",
       "      <td>[\"Three prime repair exonuclease 2\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"66\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "      <td>1y97</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    ca_p_only  entity_id                  gene_name  \\\n",
       "0       False          1          [\"FANCN\",\"PALB2\"]   \n",
       "1       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "2       False          1                        NaN   \n",
       "3       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "4       False          3  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "5       False          1             [\"PLK\",\"PLK1\"]   \n",
       "6       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "7       False          1  [\"RAD51\",\"RAD51A\",\"RECA\"]   \n",
       "8       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "9       False          3                        NaN   \n",
       "10      False          4                        NaN   \n",
       "11      False          1                  [\"TREX2\"]   \n",
       "\n",
       "                            in_chains                    in_struct_asyms  \\\n",
       "0                               [\"A\"]                              [\"A\"]   \n",
       "1                               [\"X\"]                              [\"B\"]   \n",
       "2   [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]  [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]   \n",
       "3           [\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]          [\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]   \n",
       "4                               [\"O\"]                              [\"O\"]   \n",
       "5                           [\"A\",\"B\"]                          [\"A\",\"B\"]   \n",
       "6                           [\"C\",\"D\"]                          [\"C\",\"D\"]   \n",
       "7                               [\"A\"]                              [\"A\"]   \n",
       "8                               [\"B\"]                              [\"B\"]   \n",
       "9                               [\"L\"]                              [\"C\"]   \n",
       "10                              [\"C\"]                              [\"D\"]   \n",
       "11                          [\"A\",\"B\"]                          [\"A\",\"B\"]   \n",
       "\n",
       "    length                                    molecule_name   molecule_type  \\\n",
       "0    356.0               [\"Partner and localizer of BRCA2\"]  polypeptide(L)   \n",
       "1     19.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "2    231.0    [\"DNA repair and recombination protein RadA\"]  polypeptide(L)   \n",
       "3     38.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "4     15.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "5    245.0        [\"Serine\\/threonine-protein kinase PLK1\"]  polypeptide(L)   \n",
       "6     17.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "7    243.0           [\"DNA repair protein RAD51 homolog 1\"]  polypeptide(L)   \n",
       "8     35.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "9     14.0                               [\"peptide linker\"]  polypeptide(L)   \n",
       "10     4.0           [\"ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE\"]  polypeptide(L)   \n",
       "11   238.0             [\"Three prime repair exonuclease 2\"]  polypeptide(L)   \n",
       "\n",
       "    mutation_flag  number_of_copies  \\\n",
       "0             NaN                 1   \n",
       "1             NaN                 1   \n",
       "2             NaN                 8   \n",
       "3             NaN                 6   \n",
       "4             NaN                 1   \n",
       "5             NaN                 2   \n",
       "6             NaN                 2   \n",
       "7             NaN                 1   \n",
       "8             NaN                 1   \n",
       "9             NaN                 1   \n",
       "10            NaN                 1   \n",
       "11            NaN                 2   \n",
       "\n",
       "                                         pdb_sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "2   MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "3              KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "4                                     KLNVSFSGFSTASGK   \n",
       "5   AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "6                               WSSSLATPPTLSS(TPO)VLI   \n",
       "7   SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "8                 KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "9                                  TGSTGSTGSTGS(MSE)G   \n",
       "10                                           GS(MSE)G   \n",
       "11  AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "\n",
       "          pdb_sequence_indices_with_multiple_residues  \\\n",
       "0   {\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...   \n",
       "1                                                  {}   \n",
       "2                                                  {}   \n",
       "3                                                  {}   \n",
       "4                                                  {}   \n",
       "5                                                  {}   \n",
       "6   {\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...   \n",
       "7   {\"147\":{\"three_letter_code\":\"MSE\",\"parent_chem...   \n",
       "8                                                  {}   \n",
       "9   {\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "10  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "11  {\"66\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "\n",
       "         sample_preparation  \\\n",
       "0   Genetically manipulated   \n",
       "1    Synthetically obtained   \n",
       "2   Genetically manipulated   \n",
       "3   Genetically manipulated   \n",
       "4   Genetically manipulated   \n",
       "5   Genetically manipulated   \n",
       "6    Synthetically obtained   \n",
       "7   Genetically manipulated   \n",
       "8   Genetically manipulated   \n",
       "9   Genetically manipulated   \n",
       "10  Genetically manipulated   \n",
       "11  Genetically manipulated   \n",
       "\n",
       "                                             sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "2   MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "3              KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "4                                     KLNVSFSGFSTASGK   \n",
       "5   AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "6                                   WSSSLATPPTLSSTVLI   \n",
       "7   SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "8                 KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "9                                      TGSTGSTGSTGSMG   \n",
       "10                                               GSMG   \n",
       "11  AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "\n",
       "                                               source  \\\n",
       "0   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "1   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "2   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "3   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "4   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "5   [{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...   \n",
       "6   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "7   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "8   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "9                                                  []   \n",
       "10                                                 []   \n",
       "11  [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "\n",
       "                                              synonym     weight pdb_id  \\\n",
       "0                      Partner and localizer of BRCA2  39195.793   3eu7   \n",
       "1   19meric peptide from Breast cancer type 2 susc...   2107.297   3eu7   \n",
       "2           DNA repair and recombination protein RadA  25542.064   6hqu   \n",
       "3                 Breast cancer type 2 susceptibility   3980.434   6hqu   \n",
       "4                 Breast cancer type 2 susceptibility   1531.710   6hqu   \n",
       "5                Serine/threonine-protein kinase PLK1  28193.098   6gy2   \n",
       "6                             Phosphopeptide of BRCA2   1839.975   6gy2   \n",
       "7                  DNA repair protein RAD51 homolog 1  26855.346   1n0w   \n",
       "8         Breast cancer type 2 susceptibility protein   3966.598   1n0w   \n",
       "9                                      peptide linker   1234.091   1n0w   \n",
       "10                 ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE    397.287   1n0w   \n",
       "11                   Three prime repair exonuclease 2  26224.346   1y97   \n",
       "\n",
       "    protein_entities_count  \n",
       "0                      2.0  \n",
       "1                      2.0  \n",
       "2                      3.0  \n",
       "3                      3.0  \n",
       "4                      3.0  \n",
       "5                      2.0  \n",
       "6                      2.0  \n",
       "7                      4.0  \n",
       "8                      4.0  \n",
       "9                      4.0  \n",
       "10                     4.0  \n",
       "11                     1.0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pro_full_dfrm['protein_entities_count'] = np.nan\n",
    "for i, j in pro_full_dfrm.groupby('pdb_id'):\n",
    "    cur_index = j.index\n",
    "    pro_full_dfrm.loc[cur_index, 'protein_entities_count'] = len(j)\n",
    "pro_full_dfrm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "* pure mo\n",
    "    * 1 entity, 1 chain✅\n",
    "* cleaned mo\n",
    "    * 1 entity, 1 chain, n cleaned chains✅\n",
    "    * cleaned 1 entity, 1 chain, n cleaned chains✅\n",
    "* pure ho\n",
    "    * 1 entity, n chains✅\n",
    "    * n entities, n chains, 1 protein, almost same range\n",
    "* cleaned ho\n",
    "    * cleaned 1 entity, n chains✅\n",
    "    * cleaned n entities, n chains, 1 protein, almost same range\n",
    "* pure he\n",
    "    * n entities, n chains, diff protein\n",
    "* cleaned he\n",
    "    * n entities, n chains, diff protein\n",
    "\n",
    "```json\n",
    "{\n",
    "    \"3eu7\": {\n",
    "        \"Q86YC2\": {\n",
    "            1: {\n",
    "                \"A\": \"range\"\n",
    "            }\n",
    "        },\n",
    "        \"P51587\": {\n",
    "            2: {\n",
    "                \"X\": \"range\"\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "}\n",
    "```\n",
    "\n",
    "```json\n",
    "{\n",
    "    \"3eu7\": {\n",
    "        1: {\n",
    "            \"A\": [\"Q86YC2\"],\n",
    "        }\n",
    "        ,\n",
    "        2: {\n",
    "            \"X\": [\"P51587\"],\n",
    "        }\n",
    "    }\n",
    "}\n",
    "```\n",
    "\n",
    "```json\n",
    "{\n",
    "    \"3eu7\": {\n",
    "        1: {\n",
    "            \"A\": {\n",
    "                \"observed_res_count\": 313,\n",
    "                \"observed_modified_res_count\": 1\n",
    "            }\n",
    "        }\n",
    "        ,\n",
    "        2: {\n",
    "            \"X\": {\n",
    "                \"observed_res_count\": 14,\n",
    "                \"observed_modified_res_count\": 0\n",
    "            }\n",
    "        }\n",
    "    }\n",
    "}\n",
    "```\n",
    "\n",
    "```py\n",
    "def traverse(data, pdb):\n",
    "    cleaned = 0\n",
    "    for index, (entity_id, entity) in enumerate(data[pdb].items()):\n",
    "        for inner_index, (chain_id, chain) in enumerate(entity.items()):\n",
    "            if chain['observed_res_count'] - chain['observed_modified_res_count'] < 50:\n",
    "                cleaned += 1\n",
    "    return index, inner_index, cleaned\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('1flt', 4, 0, [(1, 'V', 95), (1, 'W', 98), (2, 'X', 95), (2, 'Y', 94)], 2)\n"
     ]
    }
   ],
   "source": [
    "def traverse(data, cols, cutoff=50):\n",
    "    observed_res_count, observed_modified_res_count = cols\n",
    "    chains_lyst = []\n",
    "    for pdb in data:\n",
    "        count = 0\n",
    "        cleaned = 0\n",
    "        for entity_id, entity in data[pdb].items():\n",
    "            for chain_id, chain in entity.items():\n",
    "                atom_len = chain[observed_res_count] - chain[observed_modified_res_count]\n",
    "                if atom_len < cutoff:\n",
    "                    cleaned += 1\n",
    "                else:\n",
    "                    chains_lyst.append((int(entity_id), chain_id, atom_len))\n",
    "                count += 1\n",
    "        \n",
    "        yield pdb, count, cleaned, chains_lyst, len(set(entity_id for entity_id, _, _ in chains_lyst))\n",
    "\n",
    "data = {\n",
    "  \"1flt\": {\n",
    "    \"1\": {\n",
    "      \"V\": {\n",
    "        \"ob_res\": 95,\n",
    "        \"ob_moded_res\": 0\n",
    "      },\n",
    "      \"W\": {\n",
    "        \"ob_res\": 98,\n",
    "        \"ob_moded_res\": 0\n",
    "      }\n",
    "    },\n",
    "    \"2\": {\n",
    "      \"X\": {\n",
    "        \"ob_res\": 95,\n",
    "        \"ob_moded_res\": 0\n",
    "      },\n",
    "      \"Y\": {\n",
    "        \"ob_res\": 94,\n",
    "        \"ob_moded_res\": 0\n",
    "      }\n",
    "    }\n",
    "  }\n",
    "}\n",
    "for i in traverse(data, ('ob_res', 'ob_moded_res')):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMFWERAGCKEPCIITACEDVVSLWKALDAWQWEKLYTWHFAEVPVLQIVPVPDVYNLVCVALGNLEIREIRALFCSSDDESEKQVLLKSGNIKAVLGLTKRRLVSSSGTLSDQQVEVMTFAEDGGGKENQFLMPPEETILTFAEVQGMQEALLGTTIMNNIVIWNLKTGQLLKKMHIDDSYQASVCHKAYSEMGLLFIVLSHPCAKESESLRSPVFQLIVINPKTTLSVGVMLYCLPPGQAGRFLEGDVKDH(CSD)AAAILTSGTIAIWDLLLGQCTALLPPVSDQHWSFVKWSGTDSHLLAGQKDGNIFVYHYS\n",
      "\n",
      "GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMFWERAGCKEPCIITACEDVVSLWKALDAWQWEKLYTWHFAEVPVLQIVPVPDVYNLVCVALGNLEIREIRALFCSSDDESEKQVLLKSGNIKAVLGLTKRRLVSSSGTLSDQQVEVMTFAEDGGGKENQFLMPPEETILTFAEVQGMQEALLGTTIMNNIVIWNLKTGQLLKKMHIDDSYQASVCHKAYSEMGLLFIVLSHPCAKESESLRSPVFQLIVINPKTTLSVGVMLYCLPPGQAGRFLEGDVKDHCAAAILTSGTIAIWDLLLGQCTALLPPVSDQHWSFVKWSGTDSHLLAGQKDGNIFVYHYS\n"
     ]
    }
   ],
   "source": [
    "print(dfrm3.loc[0, 'pdb_sequence'])\n",
    "print()\n",
    "print(dfrm3.loc[0, 'sequence'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ca_p_only</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>gene_name</th>\n",
       "      <th>in_chains</th>\n",
       "      <th>in_struct_asyms</th>\n",
       "      <th>length</th>\n",
       "      <th>molecule_name</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>mutation_flag</th>\n",
       "      <th>number_of_copies</th>\n",
       "      <th>pdb_sequence</th>\n",
       "      <th>pdb_sequence_indices_with_multiple_residues</th>\n",
       "      <th>sample_preparation</th>\n",
       "      <th>sequence</th>\n",
       "      <th>source</th>\n",
       "      <th>synonym</th>\n",
       "      <th>weight</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"FANCN\",\"PALB2\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>356.0</td>\n",
       "      <td>[\"Partner and localizer of BRCA2\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>{\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Partner and localizer of BRCA2</td>\n",
       "      <td>39195.793</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"X\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>19.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>{}</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>19meric peptide from Breast cancer type 2 susc...</td>\n",
       "      <td>2107.297</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>[\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]</td>\n",
       "      <td>231.0</td>\n",
       "      <td>[\"DNA repair and recombination protein RadA\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair and recombination protein RadA</td>\n",
       "      <td>25542.064</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]</td>\n",
       "      <td>[\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]</td>\n",
       "      <td>38.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"O\"]</td>\n",
       "      <td>[\"O\"]</td>\n",
       "      <td>15.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>1531.710</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"PLK\",\"PLK1\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>245.0</td>\n",
       "      <td>[\"Serine\\/threonine-protein kinase PLK1\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...</td>\n",
       "      <td>Serine/threonine-protein kinase PLK1</td>\n",
       "      <td>28193.098</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>17.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>WSSSLATPPTLSS(TPO)VLI</td>\n",
       "      <td>{\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>WSSSLATPPTLSSTVLI</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Phosphopeptide of BRCA2</td>\n",
       "      <td>1839.975</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"RAD51\",\"RAD51A\",\"RECA\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>243.0</td>\n",
       "      <td>[\"DNA repair protein RAD51 homolog 1\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>False</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"BRCA2\",\"FACD\",\"FANCD1\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>[\"B\"]</td>\n",
       "      <td>35.0</td>\n",
       "      <td>[\"Breast cancer type 2 susceptibility protein\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility protein</td>\n",
       "      <td>3966.598</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>False</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"L\"]</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>14.0</td>\n",
       "      <td>[\"peptide linker\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>TGSTGSTGSTGS(MSE)G</td>\n",
       "      <td>{\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>TGSTGSTGSTGSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>peptide linker</td>\n",
       "      <td>1234.091</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>False</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>[\"D\"]</td>\n",
       "      <td>4.0</td>\n",
       "      <td>[\"ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GS(MSE)G</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE</td>\n",
       "      <td>397.287</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>False</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"TREX2\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>238.0</td>\n",
       "      <td>[\"Three prime repair exonuclease 2\"]</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    ca_p_only  entity_id                  gene_name  \\\n",
       "0       False          1          [\"FANCN\",\"PALB2\"]   \n",
       "1       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "4       False          1                        NaN   \n",
       "5       False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "6       False          3  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "10      False          1             [\"PLK\",\"PLK1\"]   \n",
       "11      False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "14      False          1  [\"RAD51\",\"RAD51A\",\"RECA\"]   \n",
       "15      False          2  [\"BRCA2\",\"FACD\",\"FANCD1\"]   \n",
       "16      False          3                        NaN   \n",
       "17      False          4                        NaN   \n",
       "22      False          1                  [\"TREX2\"]   \n",
       "\n",
       "                            in_chains                    in_struct_asyms  \\\n",
       "0                               [\"A\"]                              [\"A\"]   \n",
       "1                               [\"X\"]                              [\"B\"]   \n",
       "4   [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]  [\"A\",\"B\",\"C\",\"D\",\"E\",\"F\",\"G\",\"H\"]   \n",
       "5           [\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]          [\"I\",\"J\",\"K\",\"L\",\"M\",\"N\"]   \n",
       "6                               [\"O\"]                              [\"O\"]   \n",
       "10                          [\"A\",\"B\"]                          [\"A\",\"B\"]   \n",
       "11                          [\"C\",\"D\"]                          [\"C\",\"D\"]   \n",
       "14                              [\"A\"]                              [\"A\"]   \n",
       "15                              [\"B\"]                              [\"B\"]   \n",
       "16                              [\"L\"]                              [\"C\"]   \n",
       "17                              [\"C\"]                              [\"D\"]   \n",
       "22                          [\"A\",\"B\"]                          [\"A\",\"B\"]   \n",
       "\n",
       "    length                                    molecule_name   molecule_type  \\\n",
       "0    356.0               [\"Partner and localizer of BRCA2\"]  polypeptide(L)   \n",
       "1     19.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "4    231.0    [\"DNA repair and recombination protein RadA\"]  polypeptide(L)   \n",
       "5     38.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "6     15.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "10   245.0        [\"Serine\\/threonine-protein kinase PLK1\"]  polypeptide(L)   \n",
       "11    17.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "14   243.0           [\"DNA repair protein RAD51 homolog 1\"]  polypeptide(L)   \n",
       "15    35.0  [\"Breast cancer type 2 susceptibility protein\"]  polypeptide(L)   \n",
       "16    14.0                               [\"peptide linker\"]  polypeptide(L)   \n",
       "17     4.0           [\"ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE\"]  polypeptide(L)   \n",
       "22   238.0             [\"Three prime repair exonuclease 2\"]  polypeptide(L)   \n",
       "\n",
       "    mutation_flag  number_of_copies  \\\n",
       "0             NaN                 1   \n",
       "1             NaN                 1   \n",
       "4             NaN                 8   \n",
       "5             NaN                 6   \n",
       "6             NaN                 1   \n",
       "10            NaN                 2   \n",
       "11            NaN                 2   \n",
       "14            NaN                 1   \n",
       "15            NaN                 1   \n",
       "16            NaN                 1   \n",
       "17            NaN                 1   \n",
       "22            NaN                 2   \n",
       "\n",
       "                                         pdb_sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "4   MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "5              KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "6                                     KLNVSFSGFSTASGK   \n",
       "10  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "11                              WSSSLATPPTLSS(TPO)VLI   \n",
       "14  SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "15                KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "16                                 TGSTGSTGSTGS(MSE)G   \n",
       "17                                           GS(MSE)G   \n",
       "22  AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "\n",
       "          pdb_sequence_indices_with_multiple_residues  \\\n",
       "0   {\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...   \n",
       "1                                                  {}   \n",
       "4                                                  {}   \n",
       "5                                                  {}   \n",
       "6                                                  {}   \n",
       "10                                                 {}   \n",
       "11  {\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...   \n",
       "14  {\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "15                                                 {}   \n",
       "16  {\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "17  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "22  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "\n",
       "         sample_preparation  \\\n",
       "0   Genetically manipulated   \n",
       "1    Synthetically obtained   \n",
       "4   Genetically manipulated   \n",
       "5   Genetically manipulated   \n",
       "6   Genetically manipulated   \n",
       "10  Genetically manipulated   \n",
       "11   Synthetically obtained   \n",
       "14  Genetically manipulated   \n",
       "15  Genetically manipulated   \n",
       "16  Genetically manipulated   \n",
       "17  Genetically manipulated   \n",
       "22  Genetically manipulated   \n",
       "\n",
       "                                             sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "4   MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "5              KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "6                                     KLNVSFSGFSTASGK   \n",
       "10  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "11                                  WSSSLATPPTLSSTVLI   \n",
       "14  SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "15                KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "16                                     TGSTGSTGSTGSMG   \n",
       "17                                               GSMG   \n",
       "22  AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "\n",
       "                                               source  \\\n",
       "0   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "1   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "4   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "5   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "6   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "10  [{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...   \n",
       "11  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "14  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "15  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "16                                                 []   \n",
       "17                                                 []   \n",
       "22  [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "\n",
       "                                              synonym     weight pdb_id  \n",
       "0                      Partner and localizer of BRCA2  39195.793   3eu7  \n",
       "1   19meric peptide from Breast cancer type 2 susc...   2107.297   3eu7  \n",
       "4           DNA repair and recombination protein RadA  25542.064   6hqu  \n",
       "5                 Breast cancer type 2 susceptibility   3980.434   6hqu  \n",
       "6                 Breast cancer type 2 susceptibility   1531.710   6hqu  \n",
       "10               Serine/threonine-protein kinase PLK1  28193.098   6gy2  \n",
       "11                            Phosphopeptide of BRCA2   1839.975   6gy2  \n",
       "14                 DNA repair protein RAD51 homolog 1  26855.346   1n0w  \n",
       "15        Breast cancer type 2 susceptibility protein   3966.598   1n0w  \n",
       "16                                     peptide linker   1234.091   1n0w  \n",
       "17                 ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE    397.287   1n0w  \n",
       "22                   Three prime repair exonuclease 2  26224.346   1y97  "
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# len_1 = len(dfrm3.loc[0, 'pdb_sequence'])\n",
    "# len_2 = len(dfrm3.loc[0, 'sequence'])\n",
    "# print(f\"SEQRES(with modified res id) -> {len_1}, SEQRES -> {len_2}\")\n",
    "dfrm3[dfrm3.molecule_type.isin(['polypeptide(L)', 'polypeptide(D)'])]# [['pdb_id', 'entity_id']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>in_chains</th>\n",
       "      <th>pdb_sequence_indices_with_multiple_residues</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3eu7</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>{\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>6gy2</td>\n",
       "      <td>2</td>\n",
       "      <td>[\"C\",\"D\"]</td>\n",
       "      <td>{\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"A\"]</td>\n",
       "      <td>{\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>3</td>\n",
       "      <td>[\"L\"]</td>\n",
       "      <td>{\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1n0w</td>\n",
       "      <td>4</td>\n",
       "      <td>[\"C\"]</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1y97</td>\n",
       "      <td>1</td>\n",
       "      <td>[\"A\",\"B\"]</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pdb_id  entity_id  in_chains  \\\n",
       "0    3eu7          1      [\"A\"]   \n",
       "11   6gy2          2  [\"C\",\"D\"]   \n",
       "14   1n0w          1      [\"A\"]   \n",
       "16   1n0w          3      [\"L\"]   \n",
       "17   1n0w          4      [\"C\"]   \n",
       "22   1y97          1  [\"A\",\"B\"]   \n",
       "\n",
       "          pdb_sequence_indices_with_multiple_residues  \n",
       "0   {\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...  \n",
       "11  {\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...  \n",
       "14  {\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...  \n",
       "16  {\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...  \n",
       "17  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...  \n",
       "22  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...  "
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm3[dfrm3.pdb_sequence_indices_with_multiple_residues.ne('{}')][['pdb_id', 'entity_id', 'in_chains', 'pdb_sequence_indices_with_multiple_residues']].dropna(subset=['pdb_sequence_indices_with_multiple_residues'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3eu7 1\n",
      "6gy2 1\n",
      "1n0w 8\n",
      "1n0w 1\n",
      "1n0w 1\n",
      "1y97 3\n"
     ]
    }
   ],
   "source": [
    "for i in (0, 11, 14, 16, 17, 22):\n",
    "    res = json.loads(dfrm3.loc[i, 'pdb_sequence_indices_with_multiple_residues'])\n",
    "    print(dfrm3.loc[i, 'pdb_id'], len(res))\n",
    "    # pprint(res)\n",
    "    # print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['UniProt', 'chain_id', 'end', 'entity_id', 'identifier', 'identity',\n",
      "       'is_canonical', 'name', 'pdb_id', 'start', 'struct_asym_id',\n",
      "       'sifts_pdb_range', 'sifts_unp_range', 'Entry', 'pdb_GAP_list',\n",
      "       'unp_GAP_list', 'var_list', 'delete', 'var_0_count', 'unp_GAP_0_count',\n",
      "       'group_info', 'sifts_unp_pdb_var', 'sifts_range_tage'],\n",
      "      dtype='object')\n",
      "Index(['ca_p_only', 'entity_id', 'gene_name', 'in_chains', 'in_struct_asyms',\n",
      "       'length', 'molecule_name', 'molecule_type', 'mutation_flag',\n",
      "       'number_of_copies', 'pdb_sequence',\n",
      "       'pdb_sequence_indices_with_multiple_residues', 'sample_preparation',\n",
      "       'sequence', 'source', 'synonym', 'weight', 'pdb_id'],\n",
      "      dtype='object')\n",
      "Index(['entity_id', 'pdb_id'], dtype='object')\n"
     ]
    }
   ],
   "source": [
    "pprint(dfrm.columns)\n",
    "pprint(dfrm3.columns)\n",
    "pprint(dfrm.columns & dfrm3.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UniProt</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>end</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>identifier</th>\n",
       "      <th>identity</th>\n",
       "      <th>is_canonical</th>\n",
       "      <th>name</th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>start</th>\n",
       "      <th>...</th>\n",
       "      <th>molecule_type</th>\n",
       "      <th>mutation_flag</th>\n",
       "      <th>number_of_copies</th>\n",
       "      <th>pdb_sequence</th>\n",
       "      <th>pdb_sequence_indices_with_multiple_residues</th>\n",
       "      <th>sample_preparation</th>\n",
       "      <th>sequence</th>\n",
       "      <th>source</th>\n",
       "      <th>synonym</th>\n",
       "      <th>weight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Q86YC2</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":1186,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>PALB2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>PALB2_HUMAN</td>\n",
       "      <td>3eu7</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>{\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Partner and localizer of BRCA2</td>\n",
       "      <td>39195.793</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>P51587</td>\n",
       "      <td>X</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>3eu7</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>{}</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>KADLGPISLNWFEELSSEA</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>19meric peptide from Breast cancer type 2 susc...</td>\n",
       "      <td>2107.297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>1y97</td>\n",
       "      <td>{\"author_residue_number\":1,\"author_insertion_c...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Q9BQ50</td>\n",
       "      <td>B</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>1y97</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Q9BQ50-1</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>0.996</td>\n",
       "      <td>False</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>1y97</td>\n",
       "      <td>{\"author_residue_number\":0,\"author_insertion_c...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Q9BQ50-1</td>\n",
       "      <td>B</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>0.996</td>\n",
       "      <td>False</td>\n",
       "      <td>TREX2_HUMAN</td>\n",
       "      <td>1y97</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...</td>\n",
       "      <td>Three prime repair exonuclease 2</td>\n",
       "      <td>26224.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Q06609-3</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":258,\"author_insertion...</td>\n",
       "      <td>1</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>False</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Q06609-2</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":339,\"author_insertion...</td>\n",
       "      <td>1</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>0.753</td>\n",
       "      <td>False</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>{\"author_residue_number\":156,\"author_insertion...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Q06609</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":339,\"author_insertion...</td>\n",
       "      <td>1</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Q06609-4</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":339,\"author_insertion...</td>\n",
       "      <td>1</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>False</td>\n",
       "      <td>RAD51_HUMAN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>{\"author_residue_number\":115,\"author_insertion...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...</td>\n",
       "      <td>{\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair protein RAD51 homolog 1</td>\n",
       "      <td>26855.346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>P51587</td>\n",
       "      <td>B</td>\n",
       "      <td>{\"author_residue_number\":1551,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility protein</td>\n",
       "      <td>3966.598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>P53350</td>\n",
       "      <td>A</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>PLK1_HUMAN</td>\n",
       "      <td>0.988</td>\n",
       "      <td>True</td>\n",
       "      <td>PLK1_HUMAN</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...</td>\n",
       "      <td>Serine/threonine-protein kinase PLK1</td>\n",
       "      <td>28193.098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>P53350</td>\n",
       "      <td>B</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>1</td>\n",
       "      <td>PLK1_HUMAN</td>\n",
       "      <td>0.988</td>\n",
       "      <td>True</td>\n",
       "      <td>PLK1_HUMAN</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...</td>\n",
       "      <td>Serine/threonine-protein kinase PLK1</td>\n",
       "      <td>28193.098</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>P51587</td>\n",
       "      <td>C</td>\n",
       "      <td>{\"author_residue_number\":210,\"author_insertion...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>WSSSLATPPTLSS(TPO)VLI</td>\n",
       "      <td>{\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>WSSSLATPPTLSSTVLI</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Phosphopeptide of BRCA2</td>\n",
       "      <td>1839.975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>P51587</td>\n",
       "      <td>D</td>\n",
       "      <td>{\"author_residue_number\":210,\"author_insertion...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>1.000</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>WSSSLATPPTLSS(TPO)VLI</td>\n",
       "      <td>{\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...</td>\n",
       "      <td>Synthetically obtained</td>\n",
       "      <td>WSSSLATPPTLSSTVLI</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Phosphopeptide of BRCA2</td>\n",
       "      <td>1839.975</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>P51587</td>\n",
       "      <td>I</td>\n",
       "      <td>{\"author_residue_number\":2054,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.893</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1226,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>P51587</td>\n",
       "      <td>J</td>\n",
       "      <td>{\"author_residue_number\":2054,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.893</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1226,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>P51587</td>\n",
       "      <td>K</td>\n",
       "      <td>{\"author_residue_number\":2054,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.893</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1226,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>P51587</td>\n",
       "      <td>L</td>\n",
       "      <td>{\"author_residue_number\":2054,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.893</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1226,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>P51587</td>\n",
       "      <td>M</td>\n",
       "      <td>{\"author_residue_number\":2054,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.893</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1226,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>P51587</td>\n",
       "      <td>N</td>\n",
       "      <td>{\"author_residue_number\":null,\"author_insertio...</td>\n",
       "      <td>2</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.893</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1226,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>3980.434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>P51587</td>\n",
       "      <td>O</td>\n",
       "      <td>{\"author_residue_number\":2064,\"author_insertio...</td>\n",
       "      <td>3</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>0.909</td>\n",
       "      <td>True</td>\n",
       "      <td>BRCA2_HUMAN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>{\"author_residue_number\":1230,\"author_insertio...</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>KLNVSFSGFSTASGK</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...</td>\n",
       "      <td>Breast cancer type 2 susceptibility</td>\n",
       "      <td>1531.710</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>{}</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...</td>\n",
       "      <td>[{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...</td>\n",
       "      <td>DNA repair and recombination protein RadA</td>\n",
       "      <td>25542.064</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>TGSTGSTGSTGS(MSE)G</td>\n",
       "      <td>{\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>TGSTGSTGSTGSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>peptide linker</td>\n",
       "      <td>1234.091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>polypeptide(L)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>GS(MSE)G</td>\n",
       "      <td>{\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...</td>\n",
       "      <td>Genetically manipulated</td>\n",
       "      <td>GSMG</td>\n",
       "      <td>[]</td>\n",
       "      <td>ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE</td>\n",
       "      <td>397.287</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>25 rows × 39 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     UniProt chain_id                                                end  \\\n",
       "0     Q86YC2        A  {\"author_residue_number\":1186,\"author_insertio...   \n",
       "1     P51587        X  {\"author_residue_number\":null,\"author_insertio...   \n",
       "2     Q9BQ50        A  {\"author_residue_number\":null,\"author_insertio...   \n",
       "3     Q9BQ50        B  {\"author_residue_number\":null,\"author_insertio...   \n",
       "4   Q9BQ50-1        A  {\"author_residue_number\":null,\"author_insertio...   \n",
       "5   Q9BQ50-1        B  {\"author_residue_number\":null,\"author_insertio...   \n",
       "6   Q06609-3        A  {\"author_residue_number\":258,\"author_insertion...   \n",
       "7   Q06609-2        A  {\"author_residue_number\":339,\"author_insertion...   \n",
       "8     Q06609        A  {\"author_residue_number\":339,\"author_insertion...   \n",
       "9   Q06609-4        A  {\"author_residue_number\":339,\"author_insertion...   \n",
       "10    P51587        B  {\"author_residue_number\":1551,\"author_insertio...   \n",
       "11    P53350        A  {\"author_residue_number\":null,\"author_insertio...   \n",
       "12    P53350        B  {\"author_residue_number\":null,\"author_insertio...   \n",
       "13    P51587        C  {\"author_residue_number\":210,\"author_insertion...   \n",
       "14    P51587        D  {\"author_residue_number\":210,\"author_insertion...   \n",
       "15    P51587        I  {\"author_residue_number\":2054,\"author_insertio...   \n",
       "16    P51587        J  {\"author_residue_number\":2054,\"author_insertio...   \n",
       "17    P51587        K  {\"author_residue_number\":2054,\"author_insertio...   \n",
       "18    P51587        L  {\"author_residue_number\":2054,\"author_insertio...   \n",
       "19    P51587        M  {\"author_residue_number\":2054,\"author_insertio...   \n",
       "20    P51587        N  {\"author_residue_number\":null,\"author_insertio...   \n",
       "21    P51587        O  {\"author_residue_number\":2064,\"author_insertio...   \n",
       "22       NaN      NaN                                                NaN   \n",
       "23       NaN      NaN                                                NaN   \n",
       "24       NaN      NaN                                                NaN   \n",
       "\n",
       "    entity_id   identifier  identity is_canonical         name pdb_id  \\\n",
       "0           1  PALB2_HUMAN     1.000         True  PALB2_HUMAN   3eu7   \n",
       "1           2  BRCA2_HUMAN     1.000         True  BRCA2_HUMAN   3eu7   \n",
       "2           1  TREX2_HUMAN     1.000         True  TREX2_HUMAN   1y97   \n",
       "3           1  TREX2_HUMAN     1.000         True  TREX2_HUMAN   1y97   \n",
       "4           1  TREX2_HUMAN     0.996        False  TREX2_HUMAN   1y97   \n",
       "5           1  TREX2_HUMAN     0.996        False  TREX2_HUMAN   1y97   \n",
       "6           1  RAD51_HUMAN     1.000        False  RAD51_HUMAN   1n0w   \n",
       "7           1  RAD51_HUMAN     0.753        False  RAD51_HUMAN   1n0w   \n",
       "8           1  RAD51_HUMAN     1.000         True  RAD51_HUMAN   1n0w   \n",
       "9           1  RAD51_HUMAN     1.000        False  RAD51_HUMAN   1n0w   \n",
       "10          2  BRCA2_HUMAN     1.000         True  BRCA2_HUMAN   1n0w   \n",
       "11          1   PLK1_HUMAN     0.988         True   PLK1_HUMAN   6gy2   \n",
       "12          1   PLK1_HUMAN     0.988         True   PLK1_HUMAN   6gy2   \n",
       "13          2  BRCA2_HUMAN     1.000         True  BRCA2_HUMAN   6gy2   \n",
       "14          2  BRCA2_HUMAN     1.000         True  BRCA2_HUMAN   6gy2   \n",
       "15          2  BRCA2_HUMAN     0.893         True  BRCA2_HUMAN   6hqu   \n",
       "16          2  BRCA2_HUMAN     0.893         True  BRCA2_HUMAN   6hqu   \n",
       "17          2  BRCA2_HUMAN     0.893         True  BRCA2_HUMAN   6hqu   \n",
       "18          2  BRCA2_HUMAN     0.893         True  BRCA2_HUMAN   6hqu   \n",
       "19          2  BRCA2_HUMAN     0.893         True  BRCA2_HUMAN   6hqu   \n",
       "20          2  BRCA2_HUMAN     0.893         True  BRCA2_HUMAN   6hqu   \n",
       "21          3  BRCA2_HUMAN     0.909         True  BRCA2_HUMAN   6hqu   \n",
       "22          1          NaN       NaN          NaN          NaN   6hqu   \n",
       "23          3          NaN       NaN          NaN          NaN   1n0w   \n",
       "24          4          NaN       NaN          NaN          NaN   1n0w   \n",
       "\n",
       "                                                start  ...   molecule_type  \\\n",
       "0   {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "1   {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "2   {\"author_residue_number\":1,\"author_insertion_c...  ...  polypeptide(L)   \n",
       "3   {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "4   {\"author_residue_number\":0,\"author_insertion_c...  ...  polypeptide(L)   \n",
       "5   {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "6   {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "7   {\"author_residue_number\":156,\"author_insertion...  ...  polypeptide(L)   \n",
       "8   {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "9   {\"author_residue_number\":115,\"author_insertion...  ...  polypeptide(L)   \n",
       "10  {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "11  {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "12  {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "13  {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "14  {\"author_residue_number\":null,\"author_insertio...  ...  polypeptide(L)   \n",
       "15  {\"author_residue_number\":1226,\"author_insertio...  ...  polypeptide(L)   \n",
       "16  {\"author_residue_number\":1226,\"author_insertio...  ...  polypeptide(L)   \n",
       "17  {\"author_residue_number\":1226,\"author_insertio...  ...  polypeptide(L)   \n",
       "18  {\"author_residue_number\":1226,\"author_insertio...  ...  polypeptide(L)   \n",
       "19  {\"author_residue_number\":1226,\"author_insertio...  ...  polypeptide(L)   \n",
       "20  {\"author_residue_number\":1226,\"author_insertio...  ...  polypeptide(L)   \n",
       "21  {\"author_residue_number\":1230,\"author_insertio...  ...  polypeptide(L)   \n",
       "22                                                NaN  ...  polypeptide(L)   \n",
       "23                                                NaN  ...  polypeptide(L)   \n",
       "24                                                NaN  ...  polypeptide(L)   \n",
       "\n",
       "   mutation_flag number_of_copies  \\\n",
       "0            NaN                1   \n",
       "1            NaN                1   \n",
       "2            NaN                2   \n",
       "3            NaN                2   \n",
       "4            NaN                2   \n",
       "5            NaN                2   \n",
       "6            NaN                1   \n",
       "7            NaN                1   \n",
       "8            NaN                1   \n",
       "9            NaN                1   \n",
       "10           NaN                1   \n",
       "11           NaN                2   \n",
       "12           NaN                2   \n",
       "13           NaN                2   \n",
       "14           NaN                2   \n",
       "15           NaN                6   \n",
       "16           NaN                6   \n",
       "17           NaN                6   \n",
       "18           NaN                6   \n",
       "19           NaN                6   \n",
       "20           NaN                6   \n",
       "21           NaN                1   \n",
       "22           NaN                8   \n",
       "23           NaN                1   \n",
       "24           NaN                1   \n",
       "\n",
       "                                         pdb_sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "2   AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "3   AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "4   AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "5   AG(MSE)SEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSL...   \n",
       "6   SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "7   SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "8   SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "9   SEIIQITTGSKELDKLLQGGIETGSITE(MSE)FGEFRTGKTQICH...   \n",
       "10                KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "11  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "12  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "13                              WSSSLATPPTLSS(TPO)VLI   \n",
       "14                              WSSSLATPPTLSS(TPO)VLI   \n",
       "15             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "16             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "17             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "18             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "19             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "20             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "21                                    KLNVSFSGFSTASGK   \n",
       "22  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "23                                 TGSTGSTGSTGS(MSE)G   \n",
       "24                                           GS(MSE)G   \n",
       "\n",
       "          pdb_sequence_indices_with_multiple_residues  \\\n",
       "0   {\"297\":{\"three_letter_code\":\"CSD\",\"parent_chem...   \n",
       "1                                                  {}   \n",
       "2   {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "3   {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "4   {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "5   {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "6   {\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "7   {\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "8   {\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "9   {\"29\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "10                                                 {}   \n",
       "11                                                 {}   \n",
       "12                                                 {}   \n",
       "13  {\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...   \n",
       "14  {\"14\":{\"three_letter_code\":\"TPO\",\"parent_chem_...   \n",
       "15                                                 {}   \n",
       "16                                                 {}   \n",
       "17                                                 {}   \n",
       "18                                                 {}   \n",
       "19                                                 {}   \n",
       "20                                                 {}   \n",
       "21                                                 {}   \n",
       "22                                                 {}   \n",
       "23  {\"13\":{\"three_letter_code\":\"MSE\",\"parent_chem_...   \n",
       "24  {\"3\":{\"three_letter_code\":\"MSE\",\"parent_chem_c...   \n",
       "\n",
       "         sample_preparation  \\\n",
       "0   Genetically manipulated   \n",
       "1    Synthetically obtained   \n",
       "2   Genetically manipulated   \n",
       "3   Genetically manipulated   \n",
       "4   Genetically manipulated   \n",
       "5   Genetically manipulated   \n",
       "6   Genetically manipulated   \n",
       "7   Genetically manipulated   \n",
       "8   Genetically manipulated   \n",
       "9   Genetically manipulated   \n",
       "10  Genetically manipulated   \n",
       "11  Genetically manipulated   \n",
       "12  Genetically manipulated   \n",
       "13   Synthetically obtained   \n",
       "14   Synthetically obtained   \n",
       "15  Genetically manipulated   \n",
       "16  Genetically manipulated   \n",
       "17  Genetically manipulated   \n",
       "18  Genetically manipulated   \n",
       "19  Genetically manipulated   \n",
       "20  Genetically manipulated   \n",
       "21  Genetically manipulated   \n",
       "22  Genetically manipulated   \n",
       "23  Genetically manipulated   \n",
       "24  Genetically manipulated   \n",
       "\n",
       "                                             sequence  \\\n",
       "0   GPHMSVEQTETAELPASDSINPGNLQLVSELKNPSGSCSVDVSAMF...   \n",
       "1                                 KADLGPISLNWFEELSSEA   \n",
       "2   AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "3   AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "4   AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "5   AGMSEAPRAETFVFLDLEATGLPSVEPEIAELSLFAVHRSSLENPE...   \n",
       "6   SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "7   SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "8   SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "9   SEIIQITTGSKELDKLLQGGIETGSITEMFGEFRTGKTQICHTLAV...   \n",
       "10                KEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQ   \n",
       "11  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "12  AMDPEFETGEVVDCHLSDMLQQLHSVNASKPSERGLVRQEEAEDPA...   \n",
       "13                                  WSSSLATPPTLSSTVLI   \n",
       "14                                  WSSSLATPPTLSSTVLI   \n",
       "15             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "16             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "17             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "18             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "19             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "20             KLNVSTEALQKAVKLFSDIENISVNSSAFSGFSTASGK   \n",
       "21                                    KLNVSFSGFSTASGK   \n",
       "22  MATIGRISTGSKSLDKLLGGGIETQAITEVFGEFGSGKTQLAHTLA...   \n",
       "23                                     TGSTGSTGSTGSMG   \n",
       "24                                               GSMG   \n",
       "\n",
       "                                               source  \\\n",
       "0   [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "1   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "2   [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "3   [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "4   [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "5   [{\"mappings\":[{\"start\":{\"residue_number\":3},\"e...   \n",
       "6   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "7   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "8   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "9   [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "10  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "11  [{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...   \n",
       "12  [{\"mappings\":[{\"start\":{\"residue_number\":7},\"e...   \n",
       "13  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "14  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "15  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "16  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "17  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "18  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "19  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "20  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "21  [{\"mappings\":[{\"start\":{\"residue_number\":5},\"e...   \n",
       "22  [{\"mappings\":[{\"start\":{\"residue_number\":1},\"e...   \n",
       "23                                                 []   \n",
       "24                                                 []   \n",
       "\n",
       "                                              synonym     weight  \n",
       "0                      Partner and localizer of BRCA2  39195.793  \n",
       "1   19meric peptide from Breast cancer type 2 susc...   2107.297  \n",
       "2                    Three prime repair exonuclease 2  26224.346  \n",
       "3                    Three prime repair exonuclease 2  26224.346  \n",
       "4                    Three prime repair exonuclease 2  26224.346  \n",
       "5                    Three prime repair exonuclease 2  26224.346  \n",
       "6                  DNA repair protein RAD51 homolog 1  26855.346  \n",
       "7                  DNA repair protein RAD51 homolog 1  26855.346  \n",
       "8                  DNA repair protein RAD51 homolog 1  26855.346  \n",
       "9                  DNA repair protein RAD51 homolog 1  26855.346  \n",
       "10        Breast cancer type 2 susceptibility protein   3966.598  \n",
       "11               Serine/threonine-protein kinase PLK1  28193.098  \n",
       "12               Serine/threonine-protein kinase PLK1  28193.098  \n",
       "13                            Phosphopeptide of BRCA2   1839.975  \n",
       "14                            Phosphopeptide of BRCA2   1839.975  \n",
       "15                Breast cancer type 2 susceptibility   3980.434  \n",
       "16                Breast cancer type 2 susceptibility   3980.434  \n",
       "17                Breast cancer type 2 susceptibility   3980.434  \n",
       "18                Breast cancer type 2 susceptibility   3980.434  \n",
       "19                Breast cancer type 2 susceptibility   3980.434  \n",
       "20                Breast cancer type 2 susceptibility   3980.434  \n",
       "21                Breast cancer type 2 susceptibility   1531.710  \n",
       "22          DNA repair and recombination protein RadA  25542.064  \n",
       "23                                     peptide linker   1234.091  \n",
       "24                 ARTIFICIAL GLY-SER-MSE-GLY PEPTIDE    397.287  \n",
       "\n",
       "[25 rows x 39 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(dfrm, dfrm3[dfrm3.molecule_type.isin(['polypeptide(L)', 'polypeptide(D)'])], how='right')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.51s/it]\n",
      "2020-02-20 18:58:17,918 ProcessEntryData INFO 1 ids downloaded in 1.51s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>chain_id</th>\n",
       "      <th>number_residues</th>\n",
       "      <th>observed_ratio</th>\n",
       "      <th>partial_ratio</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>356</td>\n",
       "      <td>0.879</td>\n",
       "      <td>0.053</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>X</td>\n",
       "      <td>19</td>\n",
       "      <td>0.736</td>\n",
       "      <td>0.105</td>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B</td>\n",
       "      <td>231</td>\n",
       "      <td>0.948</td>\n",
       "      <td>0.000</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>E</td>\n",
       "      <td>231</td>\n",
       "      <td>0.917</td>\n",
       "      <td>0.000</td>\n",
       "      <td>E</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>C</td>\n",
       "      <td>231</td>\n",
       "      <td>0.909</td>\n",
       "      <td>0.000</td>\n",
       "      <td>C</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A</td>\n",
       "      <td>231</td>\n",
       "      <td>0.900</td>\n",
       "      <td>0.000</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>D</td>\n",
       "      <td>231</td>\n",
       "      <td>0.900</td>\n",
       "      <td>0.000</td>\n",
       "      <td>D</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>F</td>\n",
       "      <td>231</td>\n",
       "      <td>0.883</td>\n",
       "      <td>0.000</td>\n",
       "      <td>F</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>G</td>\n",
       "      <td>231</td>\n",
       "      <td>0.839</td>\n",
       "      <td>0.000</td>\n",
       "      <td>G</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>H</td>\n",
       "      <td>231</td>\n",
       "      <td>0.831</td>\n",
       "      <td>0.069</td>\n",
       "      <td>H</td>\n",
       "      <td>1</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>I</td>\n",
       "      <td>38</td>\n",
       "      <td>0.815</td>\n",
       "      <td>0.026</td>\n",
       "      <td>I</td>\n",
       "      <td>2</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>J</td>\n",
       "      <td>38</td>\n",
       "      <td>0.815</td>\n",
       "      <td>0.026</td>\n",
       "      <td>J</td>\n",
       "      <td>2</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>L</td>\n",
       "      <td>38</td>\n",
       "      <td>0.815</td>\n",
       "      <td>0.026</td>\n",
       "      <td>L</td>\n",
       "      <td>2</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>M</td>\n",
       "      <td>38</td>\n",
       "      <td>0.789</td>\n",
       "      <td>0.026</td>\n",
       "      <td>M</td>\n",
       "      <td>2</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>K</td>\n",
       "      <td>38</td>\n",
       "      <td>0.473</td>\n",
       "      <td>0.026</td>\n",
       "      <td>K</td>\n",
       "      <td>2</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>N</td>\n",
       "      <td>38</td>\n",
       "      <td>0.315</td>\n",
       "      <td>0.026</td>\n",
       "      <td>N</td>\n",
       "      <td>2</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>O</td>\n",
       "      <td>15</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.066</td>\n",
       "      <td>O</td>\n",
       "      <td>3</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>A</td>\n",
       "      <td>245</td>\n",
       "      <td>0.914</td>\n",
       "      <td>0.000</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>B</td>\n",
       "      <td>245</td>\n",
       "      <td>0.914</td>\n",
       "      <td>0.000</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>C</td>\n",
       "      <td>17</td>\n",
       "      <td>0.705</td>\n",
       "      <td>0.058</td>\n",
       "      <td>C</td>\n",
       "      <td>2</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>D</td>\n",
       "      <td>17</td>\n",
       "      <td>0.705</td>\n",
       "      <td>0.058</td>\n",
       "      <td>D</td>\n",
       "      <td>2</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>A</td>\n",
       "      <td>238</td>\n",
       "      <td>0.899</td>\n",
       "      <td>0.012</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>B</td>\n",
       "      <td>238</td>\n",
       "      <td>0.886</td>\n",
       "      <td>0.012</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>A</td>\n",
       "      <td>243</td>\n",
       "      <td>0.864</td>\n",
       "      <td>0.000</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>B</td>\n",
       "      <td>35</td>\n",
       "      <td>0.942</td>\n",
       "      <td>0.028</td>\n",
       "      <td>B</td>\n",
       "      <td>2</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>L</td>\n",
       "      <td>14</td>\n",
       "      <td>0.214</td>\n",
       "      <td>0.000</td>\n",
       "      <td>C</td>\n",
       "      <td>3</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>C</td>\n",
       "      <td>4</td>\n",
       "      <td>1.000</td>\n",
       "      <td>0.250</td>\n",
       "      <td>D</td>\n",
       "      <td>4</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   chain_id  number_residues  observed_ratio  partial_ratio struct_asym_id  \\\n",
       "0         A              356           0.879          0.053              A   \n",
       "1         X               19           0.736          0.105              B   \n",
       "2         B              231           0.948          0.000              B   \n",
       "3         E              231           0.917          0.000              E   \n",
       "4         C              231           0.909          0.000              C   \n",
       "5         A              231           0.900          0.000              A   \n",
       "6         D              231           0.900          0.000              D   \n",
       "7         F              231           0.883          0.000              F   \n",
       "8         G              231           0.839          0.000              G   \n",
       "9         H              231           0.831          0.069              H   \n",
       "10        I               38           0.815          0.026              I   \n",
       "11        J               38           0.815          0.026              J   \n",
       "12        L               38           0.815          0.026              L   \n",
       "13        M               38           0.789          0.026              M   \n",
       "14        K               38           0.473          0.026              K   \n",
       "15        N               38           0.315          0.026              N   \n",
       "16        O               15           1.000          0.066              O   \n",
       "17        A              245           0.914          0.000              A   \n",
       "18        B              245           0.914          0.000              B   \n",
       "19        C               17           0.705          0.058              C   \n",
       "20        D               17           0.705          0.058              D   \n",
       "21        A              238           0.899          0.012              A   \n",
       "22        B              238           0.886          0.012              B   \n",
       "23        A              243           0.864          0.000              A   \n",
       "24        B               35           0.942          0.028              B   \n",
       "25        L               14           0.214          0.000              C   \n",
       "26        C                4           1.000          0.250              D   \n",
       "\n",
       "    entity_id pdb_id  \n",
       "0           1   3eu7  \n",
       "1           2   3eu7  \n",
       "2           1   6hqu  \n",
       "3           1   6hqu  \n",
       "4           1   6hqu  \n",
       "5           1   6hqu  \n",
       "6           1   6hqu  \n",
       "7           1   6hqu  \n",
       "8           1   6hqu  \n",
       "9           1   6hqu  \n",
       "10          2   6hqu  \n",
       "11          2   6hqu  \n",
       "12          2   6hqu  \n",
       "13          2   6hqu  \n",
       "14          2   6hqu  \n",
       "15          2   6hqu  \n",
       "16          3   6hqu  \n",
       "17          1   6gy2  \n",
       "18          1   6gy2  \n",
       "19          2   6gy2  \n",
       "20          2   6gy2  \n",
       "21          1   1y97  \n",
       "22          1   1y97  \n",
       "23          1   1n0w  \n",
       "24          2   1n0w  \n",
       "25          3   1n0w  \n",
       "26          4   1n0w  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm4 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/observed_residues_ratio/',\n",
    "    method='post',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.54s/it]\n",
      "2020-02-19 21:02:19,368 ProcessEntryData INFO 1 ids downloaded in 3.56s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>end</th>\n",
       "      <th>start</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>{\"author_residue_number\":878,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":854,\"author_insertion...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>{\"author_residue_number\":949,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":881,\"author_insertion...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>{\"author_residue_number\":995,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":955,\"author_insertion...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>{\"author_residue_number\":1076,\"author_insertio...</td>\n",
       "      <td>{\"author_residue_number\":998,\"author_insertion...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>{\"author_residue_number\":1186,\"author_insertio...</td>\n",
       "      <td>{\"author_residue_number\":1088,\"author_insertio...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66</th>\n",
       "      <td>{\"author_residue_number\":185,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":169,\"author_insertion...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>{\"author_residue_number\":228,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":189,\"author_insertion...</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>{\"author_residue_number\":156,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":3,\"author_insertion_c...</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>{\"author_residue_number\":184,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":169,\"author_insertion...</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>{\"author_residue_number\":228,\"author_insertion...</td>\n",
       "      <td>{\"author_residue_number\":188,\"author_insertion...</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>71 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                  end  \\\n",
       "0   {\"author_residue_number\":878,\"author_insertion...   \n",
       "1   {\"author_residue_number\":949,\"author_insertion...   \n",
       "2   {\"author_residue_number\":995,\"author_insertion...   \n",
       "3   {\"author_residue_number\":1076,\"author_insertio...   \n",
       "4   {\"author_residue_number\":1186,\"author_insertio...   \n",
       "..                                                ...   \n",
       "66  {\"author_residue_number\":185,\"author_insertion...   \n",
       "67  {\"author_residue_number\":228,\"author_insertion...   \n",
       "68  {\"author_residue_number\":156,\"author_insertion...   \n",
       "69  {\"author_residue_number\":184,\"author_insertion...   \n",
       "70  {\"author_residue_number\":228,\"author_insertion...   \n",
       "\n",
       "                                                start chain_id struct_asym_id  \\\n",
       "0   {\"author_residue_number\":854,\"author_insertion...        A              A   \n",
       "1   {\"author_residue_number\":881,\"author_insertion...        A              A   \n",
       "2   {\"author_residue_number\":955,\"author_insertion...        A              A   \n",
       "3   {\"author_residue_number\":998,\"author_insertion...        A              A   \n",
       "4   {\"author_residue_number\":1088,\"author_insertio...        A              A   \n",
       "..                                                ...      ...            ...   \n",
       "66  {\"author_residue_number\":169,\"author_insertion...        A              A   \n",
       "67  {\"author_residue_number\":189,\"author_insertion...        A              A   \n",
       "68  {\"author_residue_number\":3,\"author_insertion_c...        B              B   \n",
       "69  {\"author_residue_number\":169,\"author_insertion...        B              B   \n",
       "70  {\"author_residue_number\":188,\"author_insertion...        B              B   \n",
       "\n",
       "    entity_id pdb_id  \n",
       "0           1   3eu7  \n",
       "1           1   3eu7  \n",
       "2           1   3eu7  \n",
       "3           1   3eu7  \n",
       "4           1   3eu7  \n",
       "..        ...    ...  \n",
       "66          1   1y97  \n",
       "67          1   1y97  \n",
       "68          1   1y97  \n",
       "69          1   1y97  \n",
       "70          1   1y97  \n",
       "\n",
       "[71 rows x 6 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm5 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/polymer_coverage/',\n",
    "    method='post',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7a329dc3516744ad83eefff99eb679bb",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "A Jupyter Widget"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "view = nglview.show_pdbid(\"3eu7\")\n",
    "view"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  2.97it/s]\n",
      "2020-02-22 11:57:34,286 ProcessEntryData INFO 5 ids downloaded in 1.68s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author_insertion_code</th>\n",
       "      <th>author_residue_number</th>\n",
       "      <th>multiple_conformers</th>\n",
       "      <th>observed_ratio</th>\n",
       "      <th>residue_name</th>\n",
       "      <th>residue_number</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td>97</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>SER</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>GLU</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>99</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>4</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>101</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>GLN</td>\n",
       "      <td>5</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3783</th>\n",
       "      <td></td>\n",
       "      <td>402</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MG</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>Q</td>\n",
       "      <td>5</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3784</th>\n",
       "      <td></td>\n",
       "      <td>402</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MG</td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>S</td>\n",
       "      <td>5</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3785</th>\n",
       "      <td></td>\n",
       "      <td>402</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MG</td>\n",
       "      <td>1</td>\n",
       "      <td>C</td>\n",
       "      <td>U</td>\n",
       "      <td>5</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3786</th>\n",
       "      <td></td>\n",
       "      <td>402</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MG</td>\n",
       "      <td>1</td>\n",
       "      <td>D</td>\n",
       "      <td>W</td>\n",
       "      <td>5</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3787</th>\n",
       "      <td></td>\n",
       "      <td>402</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MG</td>\n",
       "      <td>1</td>\n",
       "      <td>E</td>\n",
       "      <td>Y</td>\n",
       "      <td>5</td>\n",
       "      <td>6hqu</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3788 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     author_insertion_code  author_residue_number multiple_conformers  \\\n",
       "0                                              97                 NaN   \n",
       "1                                              98                 NaN   \n",
       "2                                              99                 NaN   \n",
       "3                                             100                 NaN   \n",
       "4                                             101                 NaN   \n",
       "...                    ...                    ...                 ...   \n",
       "3783                                          402                 NaN   \n",
       "3784                                          402                 NaN   \n",
       "3785                                          402                 NaN   \n",
       "3786                                          402                 NaN   \n",
       "3787                                          402                 NaN   \n",
       "\n",
       "      observed_ratio residue_name  residue_number chain_id struct_asym_id  \\\n",
       "0                0.0          SER               1        A              A   \n",
       "1                1.0          GLU               2        A              A   \n",
       "2                1.0          ILE               3        A              A   \n",
       "3                1.0          ILE               4        A              A   \n",
       "4                1.0          GLN               5        A              A   \n",
       "...              ...          ...             ...      ...            ...   \n",
       "3783             1.0           MG               1        A              Q   \n",
       "3784             1.0           MG               1        B              S   \n",
       "3785             1.0           MG               1        C              U   \n",
       "3786             1.0           MG               1        D              W   \n",
       "3787             1.0           MG               1        E              Y   \n",
       "\n",
       "      entity_id pdb_id  \n",
       "0             1   1n0w  \n",
       "1             1   1n0w  \n",
       "2             1   1n0w  \n",
       "3             1   1n0w  \n",
       "4             1   1n0w  \n",
       "...         ...    ...  \n",
       "3783          5   6hqu  \n",
       "3784          5   6hqu  \n",
       "3785          5   6hqu  \n",
       "3786          5   6hqu  \n",
       "3787          5   6hqu  \n",
       "\n",
       "[3788 rows x 10 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm6 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/residue_listing/',\n",
    "    method='get',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0       1\n",
       "1       1\n",
       "2       1\n",
       "3       1\n",
       "4       1\n",
       "       ..\n",
       "3783    5\n",
       "3784    5\n",
       "3785    5\n",
       "3786    5\n",
       "3787    5\n",
       "Name: entity_id, Length: 3788, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm6.entity_id"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author_insertion_code</th>\n",
       "      <th>author_residue_number</th>\n",
       "      <th>multiple_conformers</th>\n",
       "      <th>observed_ratio</th>\n",
       "      <th>residue_name</th>\n",
       "      <th>residue_number</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>SER</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>NaN</td>\n",
       "      <td>230</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>THR</td>\n",
       "      <td>134</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>NaN</td>\n",
       "      <td>231</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ASP</td>\n",
       "      <td>135</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>NaN</td>\n",
       "      <td>232</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>TYR</td>\n",
       "      <td>136</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>NaN</td>\n",
       "      <td>233</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>SER</td>\n",
       "      <td>137</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>NaN</td>\n",
       "      <td>234</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLY</td>\n",
       "      <td>138</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>NaN</td>\n",
       "      <td>235</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ARG</td>\n",
       "      <td>139</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>NaN</td>\n",
       "      <td>236</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLY</td>\n",
       "      <td>140</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171</th>\n",
       "      <td>NaN</td>\n",
       "      <td>268</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLN</td>\n",
       "      <td>172</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>NaN</td>\n",
       "      <td>269</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>VAL</td>\n",
       "      <td>173</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>173</th>\n",
       "      <td>NaN</td>\n",
       "      <td>270</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>VAL</td>\n",
       "      <td>174</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>NaN</td>\n",
       "      <td>271</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALA</td>\n",
       "      <td>175</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175</th>\n",
       "      <td>NaN</td>\n",
       "      <td>272</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLN</td>\n",
       "      <td>176</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>176</th>\n",
       "      <td>NaN</td>\n",
       "      <td>273</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>VAL</td>\n",
       "      <td>177</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>177</th>\n",
       "      <td>NaN</td>\n",
       "      <td>274</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ASP</td>\n",
       "      <td>178</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>NaN</td>\n",
       "      <td>275</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLY</td>\n",
       "      <td>179</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>NaN</td>\n",
       "      <td>276</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALA</td>\n",
       "      <td>180</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>NaN</td>\n",
       "      <td>277</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALA</td>\n",
       "      <td>181</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181</th>\n",
       "      <td>NaN</td>\n",
       "      <td>278</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>182</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>182</th>\n",
       "      <td>NaN</td>\n",
       "      <td>279</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>PHE</td>\n",
       "      <td>183</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>NaN</td>\n",
       "      <td>280</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALA</td>\n",
       "      <td>184</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>184</th>\n",
       "      <td>NaN</td>\n",
       "      <td>281</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALA</td>\n",
       "      <td>185</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>185</th>\n",
       "      <td>NaN</td>\n",
       "      <td>282</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ASP</td>\n",
       "      <td>186</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>NaN</td>\n",
       "      <td>283</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>PRO</td>\n",
       "      <td>187</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187</th>\n",
       "      <td>NaN</td>\n",
       "      <td>284</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>LYS</td>\n",
       "      <td>188</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>NaN</td>\n",
       "      <td>285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>LYS</td>\n",
       "      <td>189</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>NaN</td>\n",
       "      <td>286</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>PRO</td>\n",
       "      <td>190</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190</th>\n",
       "      <td>NaN</td>\n",
       "      <td>287</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>191</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>191</th>\n",
       "      <td>NaN</td>\n",
       "      <td>288</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLY</td>\n",
       "      <td>192</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>NaN</td>\n",
       "      <td>289</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>GLY</td>\n",
       "      <td>193</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>193</th>\n",
       "      <td>NaN</td>\n",
       "      <td>290</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ASN</td>\n",
       "      <td>194</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>NaN</td>\n",
       "      <td>291</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>195</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>NaN</td>\n",
       "      <td>292</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>196</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     author_insertion_code  author_residue_number multiple_conformers  \\\n",
       "0                      NaN                     97                 NaN   \n",
       "133                    NaN                    230                 NaN   \n",
       "134                    NaN                    231                 NaN   \n",
       "135                    NaN                    232                 NaN   \n",
       "136                    NaN                    233                 NaN   \n",
       "137                    NaN                    234                 NaN   \n",
       "138                    NaN                    235                 NaN   \n",
       "139                    NaN                    236                 NaN   \n",
       "171                    NaN                    268                 NaN   \n",
       "172                    NaN                    269                 NaN   \n",
       "173                    NaN                    270                 NaN   \n",
       "174                    NaN                    271                 NaN   \n",
       "175                    NaN                    272                 NaN   \n",
       "176                    NaN                    273                 NaN   \n",
       "177                    NaN                    274                 NaN   \n",
       "178                    NaN                    275                 NaN   \n",
       "179                    NaN                    276                 NaN   \n",
       "180                    NaN                    277                 NaN   \n",
       "181                    NaN                    278                 NaN   \n",
       "182                    NaN                    279                 NaN   \n",
       "183                    NaN                    280                 NaN   \n",
       "184                    NaN                    281                 NaN   \n",
       "185                    NaN                    282                 NaN   \n",
       "186                    NaN                    283                 NaN   \n",
       "187                    NaN                    284                 NaN   \n",
       "188                    NaN                    285                 NaN   \n",
       "189                    NaN                    286                 NaN   \n",
       "190                    NaN                    287                 NaN   \n",
       "191                    NaN                    288                 NaN   \n",
       "192                    NaN                    289                 NaN   \n",
       "193                    NaN                    290                 NaN   \n",
       "194                    NaN                    291                 NaN   \n",
       "195                    NaN                    292                 NaN   \n",
       "\n",
       "     observed_ratio residue_name  residue_number chain_id struct_asym_id  \\\n",
       "0               0.0          SER               1        A              A   \n",
       "133             0.0          THR             134        A              A   \n",
       "134             0.0          ASP             135        A              A   \n",
       "135             0.0          TYR             136        A              A   \n",
       "136             0.0          SER             137        A              A   \n",
       "137             0.0          GLY             138        A              A   \n",
       "138             0.0          ARG             139        A              A   \n",
       "139             0.0          GLY             140        A              A   \n",
       "171             0.0          GLN             172        A              A   \n",
       "172             0.0          VAL             173        A              A   \n",
       "173             0.0          VAL             174        A              A   \n",
       "174             0.0          ALA             175        A              A   \n",
       "175             0.0          GLN             176        A              A   \n",
       "176             0.0          VAL             177        A              A   \n",
       "177             0.0          ASP             178        A              A   \n",
       "178             0.0          GLY             179        A              A   \n",
       "179             0.0          ALA             180        A              A   \n",
       "180             0.0          ALA             181        A              A   \n",
       "181             0.0          MSE             182        A              A   \n",
       "182             0.0          PHE             183        A              A   \n",
       "183             0.0          ALA             184        A              A   \n",
       "184             0.0          ALA             185        A              A   \n",
       "185             0.0          ASP             186        A              A   \n",
       "186             0.0          PRO             187        A              A   \n",
       "187             0.0          LYS             188        A              A   \n",
       "188             0.0          LYS             189        A              A   \n",
       "189             0.0          PRO             190        A              A   \n",
       "190             0.0          ILE             191        A              A   \n",
       "191             0.0          GLY             192        A              A   \n",
       "192             0.0          GLY             193        A              A   \n",
       "193             0.0          ASN             194        A              A   \n",
       "194             0.0          ILE             195        A              A   \n",
       "195             0.0          ILE             196        A              A   \n",
       "\n",
       "     entity_id pdb_id  \n",
       "0            1   1n0w  \n",
       "133          1   1n0w  \n",
       "134          1   1n0w  \n",
       "135          1   1n0w  \n",
       "136          1   1n0w  \n",
       "137          1   1n0w  \n",
       "138          1   1n0w  \n",
       "139          1   1n0w  \n",
       "171          1   1n0w  \n",
       "172          1   1n0w  \n",
       "173          1   1n0w  \n",
       "174          1   1n0w  \n",
       "175          1   1n0w  \n",
       "176          1   1n0w  \n",
       "177          1   1n0w  \n",
       "178          1   1n0w  \n",
       "179          1   1n0w  \n",
       "180          1   1n0w  \n",
       "181          1   1n0w  \n",
       "182          1   1n0w  \n",
       "183          1   1n0w  \n",
       "184          1   1n0w  \n",
       "185          1   1n0w  \n",
       "186          1   1n0w  \n",
       "187          1   1n0w  \n",
       "188          1   1n0w  \n",
       "189          1   1n0w  \n",
       "190          1   1n0w  \n",
       "191          1   1n0w  \n",
       "192          1   1n0w  \n",
       "193          1   1n0w  \n",
       "194          1   1n0w  \n",
       "195          1   1n0w  "
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm6[(dfrm6.pdb_id.eq('1n0w')) & (dfrm6.chain_id.eq('A')) & (dfrm6.entity_id.eq(1)) & (dfrm6.observed_ratio.eq(0))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "43\n"
     ]
    }
   ],
   "source": [
    "print(len(dfrm6[\n",
    "    (dfrm6.pdb_id.eq('3eu7')) & \n",
    "    (dfrm6.observed_ratio.eq(0)) & \n",
    "    (dfrm6.chain_id.eq('A')) &\n",
    "    (dfrm6.entity_id.eq(1))\n",
    "]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Q?\n",
    "```warning\n",
    "|dict,6gy2,array,1,dict,pdb_sequence_indices_with_multiple_residues,dict,14|dict,6gy2,array,1,dict,pdb_sequence_indices_with_multiple_residues,dict,14,dict,three_letter_code|dict,6gy2,array,1,dict,pdb_sequence_indices_with_multiple_residues,dict,14,dict,parent_chem_comp_ids|dict,6gy2,array,1,dict,pdb_sequence_indices_with_multiple_residues,dict,14,dict,one_letter_code\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.51s/it]\n",
      "2020-02-22 11:57:40,833 ProcessEntryData INFO 1 ids downloaded in 1.52s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>alternate_conformers</th>\n",
       "      <th>author_insertion_code</th>\n",
       "      <th>author_residue_number</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>chem_comp_id</th>\n",
       "      <th>chem_comp_name</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>residue_number</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>1127</td>\n",
       "      <td>A</td>\n",
       "      <td>CSD</td>\n",
       "      <td>3-SULFINOALANINE</td>\n",
       "      <td>1</td>\n",
       "      <td>297</td>\n",
       "      <td>A</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>207</td>\n",
       "      <td>C</td>\n",
       "      <td>TPO</td>\n",
       "      <td>PHOSPHOTHREONINE</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>C</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>207</td>\n",
       "      <td>D</td>\n",
       "      <td>TPO</td>\n",
       "      <td>PHOSPHOTHREONINE</td>\n",
       "      <td>2</td>\n",
       "      <td>14</td>\n",
       "      <td>D</td>\n",
       "      <td>6gy2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>64</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>66</td>\n",
       "      <td>A</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>225</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>227</td>\n",
       "      <td>A</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>1</td>\n",
       "      <td>B</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>64</td>\n",
       "      <td>B</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>66</td>\n",
       "      <td>B</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>225</td>\n",
       "      <td>B</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>227</td>\n",
       "      <td>B</td>\n",
       "      <td>1y97</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>125</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>29</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>2</td>\n",
       "      <td></td>\n",
       "      <td>158</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>62</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>210</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>114</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>211</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>115</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>243</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>147</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>251</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>155</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>278</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>182</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>2</td>\n",
       "      <td></td>\n",
       "      <td>326</td>\n",
       "      <td>A</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>1</td>\n",
       "      <td>230</td>\n",
       "      <td>A</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>1555</td>\n",
       "      <td>L</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>C</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>0</td>\n",
       "      <td></td>\n",
       "      <td>3</td>\n",
       "      <td>C</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>D</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    alternate_conformers author_insertion_code  author_residue_number  \\\n",
       "0                      0                                         1127   \n",
       "1                      0                                          207   \n",
       "2                      0                                          207   \n",
       "3                      0                                            1   \n",
       "4                      0                                           64   \n",
       "5                      0                                          225   \n",
       "6                      0                                            1   \n",
       "7                      0                                           64   \n",
       "8                      0                                          225   \n",
       "9                      0                                          125   \n",
       "10                     2                                          158   \n",
       "11                     0                                          210   \n",
       "12                     0                                          211   \n",
       "13                     0                                          243   \n",
       "14                     0                                          251   \n",
       "15                     0                                          278   \n",
       "16                     2                                          326   \n",
       "17                     0                                         1555   \n",
       "18                     0                                            3   \n",
       "\n",
       "   chain_id chem_comp_id    chem_comp_name  entity_id  residue_number  \\\n",
       "0         A          CSD  3-SULFINOALANINE          1             297   \n",
       "1         C          TPO  PHOSPHOTHREONINE          2              14   \n",
       "2         D          TPO  PHOSPHOTHREONINE          2              14   \n",
       "3         A          MSE  SELENOMETHIONINE          1               3   \n",
       "4         A          MSE  SELENOMETHIONINE          1              66   \n",
       "5         A          MSE  SELENOMETHIONINE          1             227   \n",
       "6         B          MSE  SELENOMETHIONINE          1               3   \n",
       "7         B          MSE  SELENOMETHIONINE          1              66   \n",
       "8         B          MSE  SELENOMETHIONINE          1             227   \n",
       "9         A          MSE  SELENOMETHIONINE          1              29   \n",
       "10        A          MSE  SELENOMETHIONINE          1              62   \n",
       "11        A          MSE  SELENOMETHIONINE          1             114   \n",
       "12        A          MSE  SELENOMETHIONINE          1             115   \n",
       "13        A          MSE  SELENOMETHIONINE          1             147   \n",
       "14        A          MSE  SELENOMETHIONINE          1             155   \n",
       "15        A          MSE  SELENOMETHIONINE          1             182   \n",
       "16        A          MSE  SELENOMETHIONINE          1             230   \n",
       "17        L          MSE  SELENOMETHIONINE          3              13   \n",
       "18        C          MSE  SELENOMETHIONINE          4               3   \n",
       "\n",
       "   struct_asym_id pdb_id  \n",
       "0               A   3eu7  \n",
       "1               C   6gy2  \n",
       "2               D   6gy2  \n",
       "3               A   1y97  \n",
       "4               A   1y97  \n",
       "5               A   1y97  \n",
       "6               B   1y97  \n",
       "7               B   1y97  \n",
       "8               B   1y97  \n",
       "9               A   1n0w  \n",
       "10              A   1n0w  \n",
       "11              A   1n0w  \n",
       "12              A   1n0w  \n",
       "13              A   1n0w  \n",
       "14              A   1n0w  \n",
       "15              A   1n0w  \n",
       "16              A   1n0w  \n",
       "17              C   1n0w  \n",
       "18              D   1n0w  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm7 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/modified_AA_or_NA/',\n",
    "    method='post',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({'3eu7': 1, '6gy2': 2, '1y97': 6, '1n0w': 10})"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Counter(dfrm7.pdb_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      NaN\n",
       "1      NaN\n",
       "2      NaN\n",
       "3      NaN\n",
       "4      NaN\n",
       "        ..\n",
       "3783   NaN\n",
       "3784   NaN\n",
       "3785   NaN\n",
       "3786   NaN\n",
       "3787   NaN\n",
       "Name: author_insertion_code, Length: 3788, dtype: float64"
      ]
     },
     "execution_count": 231,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm6.author_insertion_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['author_insertion_code', 'author_residue_number', 'chain_id',\n",
      "       'entity_id', 'residue_number', 'struct_asym_id', 'pdb_id'],\n",
      "      dtype='object')\n",
      "Index(['alternate_conformers', 'author_insertion_code',\n",
      "       'author_residue_number', 'chain_id', 'chem_comp_id', 'chem_comp_name',\n",
      "       'entity_id', 'residue_number', 'struct_asym_id', 'pdb_id'],\n",
      "      dtype='object')\n",
      "Index(['author_insertion_code', 'author_residue_number', 'multiple_conformers',\n",
      "       'observed_ratio', 'residue_name', 'residue_number', 'chain_id',\n",
      "       'struct_asym_id', 'entity_id', 'pdb_id'],\n",
      "      dtype='object')\n"
     ]
    }
   ],
   "source": [
    "pprint(dfrm7.columns & dfrm6.columns)\n",
    "pprint(dfrm7.columns)\n",
    "pprint(dfrm6.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author_insertion_code</th>\n",
       "      <th>author_residue_number</th>\n",
       "      <th>multiple_conformers</th>\n",
       "      <th>observed_ratio</th>\n",
       "      <th>residue_name</th>\n",
       "      <th>residue_number</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>alternate_conformers</th>\n",
       "      <th>chem_comp_id</th>\n",
       "      <th>chem_comp_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td></td>\n",
       "      <td>125</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>29</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td></td>\n",
       "      <td>158</td>\n",
       "      <td>[{\"residue_name\":\"MSE\",\"alternate_conformers\":2}]</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>62</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>2.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>113</th>\n",
       "      <td></td>\n",
       "      <td>210</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>114</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>114</th>\n",
       "      <td></td>\n",
       "      <td>211</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>115</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td></td>\n",
       "      <td>243</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>147</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td></td>\n",
       "      <td>251</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>155</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>181</th>\n",
       "      <td></td>\n",
       "      <td>278</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>182</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229</th>\n",
       "      <td></td>\n",
       "      <td>326</td>\n",
       "      <td>[{\"residue_name\":\"MSE\",\"alternate_conformers\":2}]</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>230</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>2.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>290</th>\n",
       "      <td></td>\n",
       "      <td>1555</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>13</td>\n",
       "      <td>L</td>\n",
       "      <td>C</td>\n",
       "      <td>3</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>294</th>\n",
       "      <td></td>\n",
       "      <td>3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>3</td>\n",
       "      <td>C</td>\n",
       "      <td>D</td>\n",
       "      <td>4</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>599</th>\n",
       "      <td></td>\n",
       "      <td>1127</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>CSD</td>\n",
       "      <td>297</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>3eu7</td>\n",
       "      <td>0.0</td>\n",
       "      <td>CSD</td>\n",
       "      <td>3-SULFINOALANINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>682</th>\n",
       "      <td></td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>745</th>\n",
       "      <td></td>\n",
       "      <td>64</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>66</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>906</th>\n",
       "      <td></td>\n",
       "      <td>225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>227</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>920</th>\n",
       "      <td></td>\n",
       "      <td>1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>3</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>983</th>\n",
       "      <td></td>\n",
       "      <td>64</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>66</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1144</th>\n",
       "      <td></td>\n",
       "      <td>225</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>227</td>\n",
       "      <td>B</td>\n",
       "      <td>B</td>\n",
       "      <td>1</td>\n",
       "      <td>1y97</td>\n",
       "      <td>0.0</td>\n",
       "      <td>MSE</td>\n",
       "      <td>SELENOMETHIONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1659</th>\n",
       "      <td></td>\n",
       "      <td>207</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>TPO</td>\n",
       "      <td>14</td>\n",
       "      <td>C</td>\n",
       "      <td>C</td>\n",
       "      <td>2</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>TPO</td>\n",
       "      <td>PHOSPHOTHREONINE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1676</th>\n",
       "      <td></td>\n",
       "      <td>207</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>TPO</td>\n",
       "      <td>14</td>\n",
       "      <td>D</td>\n",
       "      <td>D</td>\n",
       "      <td>2</td>\n",
       "      <td>6gy2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>TPO</td>\n",
       "      <td>PHOSPHOTHREONINE</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     author_insertion_code  author_residue_number  \\\n",
       "28                                            125   \n",
       "61                                            158   \n",
       "113                                           210   \n",
       "114                                           211   \n",
       "146                                           243   \n",
       "154                                           251   \n",
       "181                                           278   \n",
       "229                                           326   \n",
       "290                                          1555   \n",
       "294                                             3   \n",
       "599                                          1127   \n",
       "682                                             1   \n",
       "745                                            64   \n",
       "906                                           225   \n",
       "920                                             1   \n",
       "983                                            64   \n",
       "1144                                          225   \n",
       "1659                                          207   \n",
       "1676                                          207   \n",
       "\n",
       "                                    multiple_conformers  observed_ratio  \\\n",
       "28                                                  NaN             1.0   \n",
       "61    [{\"residue_name\":\"MSE\",\"alternate_conformers\":2}]             1.0   \n",
       "113                                                 NaN             1.0   \n",
       "114                                                 NaN             1.0   \n",
       "146                                                 NaN             1.0   \n",
       "154                                                 NaN             1.0   \n",
       "181                                                 NaN             0.0   \n",
       "229   [{\"residue_name\":\"MSE\",\"alternate_conformers\":2}]             1.0   \n",
       "290                                                 NaN             0.0   \n",
       "294                                                 NaN             1.0   \n",
       "599                                                 NaN             1.0   \n",
       "682                                                 NaN             1.0   \n",
       "745                                                 NaN             1.0   \n",
       "906                                                 NaN             1.0   \n",
       "920                                                 NaN             0.0   \n",
       "983                                                 NaN             1.0   \n",
       "1144                                                NaN             1.0   \n",
       "1659                                                NaN             1.0   \n",
       "1676                                                NaN             1.0   \n",
       "\n",
       "     residue_name  residue_number chain_id struct_asym_id  entity_id pdb_id  \\\n",
       "28            MSE              29        A              A          1   1n0w   \n",
       "61            MSE              62        A              A          1   1n0w   \n",
       "113           MSE             114        A              A          1   1n0w   \n",
       "114           MSE             115        A              A          1   1n0w   \n",
       "146           MSE             147        A              A          1   1n0w   \n",
       "154           MSE             155        A              A          1   1n0w   \n",
       "181           MSE             182        A              A          1   1n0w   \n",
       "229           MSE             230        A              A          1   1n0w   \n",
       "290           MSE              13        L              C          3   1n0w   \n",
       "294           MSE               3        C              D          4   1n0w   \n",
       "599           CSD             297        A              A          1   3eu7   \n",
       "682           MSE               3        A              A          1   1y97   \n",
       "745           MSE              66        A              A          1   1y97   \n",
       "906           MSE             227        A              A          1   1y97   \n",
       "920           MSE               3        B              B          1   1y97   \n",
       "983           MSE              66        B              B          1   1y97   \n",
       "1144          MSE             227        B              B          1   1y97   \n",
       "1659          TPO              14        C              C          2   6gy2   \n",
       "1676          TPO              14        D              D          2   6gy2   \n",
       "\n",
       "      alternate_conformers chem_comp_id    chem_comp_name  \n",
       "28                     0.0          MSE  SELENOMETHIONINE  \n",
       "61                     2.0          MSE  SELENOMETHIONINE  \n",
       "113                    0.0          MSE  SELENOMETHIONINE  \n",
       "114                    0.0          MSE  SELENOMETHIONINE  \n",
       "146                    0.0          MSE  SELENOMETHIONINE  \n",
       "154                    0.0          MSE  SELENOMETHIONINE  \n",
       "181                    0.0          MSE  SELENOMETHIONINE  \n",
       "229                    2.0          MSE  SELENOMETHIONINE  \n",
       "290                    0.0          MSE  SELENOMETHIONINE  \n",
       "294                    0.0          MSE  SELENOMETHIONINE  \n",
       "599                    0.0          CSD  3-SULFINOALANINE  \n",
       "682                    0.0          MSE  SELENOMETHIONINE  \n",
       "745                    0.0          MSE  SELENOMETHIONINE  \n",
       "906                    0.0          MSE  SELENOMETHIONINE  \n",
       "920                    0.0          MSE  SELENOMETHIONINE  \n",
       "983                    0.0          MSE  SELENOMETHIONINE  \n",
       "1144                   0.0          MSE  SELENOMETHIONINE  \n",
       "1659                   0.0          TPO  PHOSPHOTHREONINE  \n",
       "1676                   0.0          TPO  PHOSPHOTHREONINE  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm67 = pd.merge(dfrm6, dfrm7, how='left')\n",
    "dfrm67.dropna(subset=['chem_comp_id'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author_insertion_code</th>\n",
       "      <th>author_residue_number</th>\n",
       "      <th>multiple_conformers</th>\n",
       "      <th>observed_ratio</th>\n",
       "      <th>residue_name</th>\n",
       "      <th>residue_number</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>alternate_conformers</th>\n",
       "      <th>chem_comp_id</th>\n",
       "      <th>chem_comp_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td>97</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>SER</td>\n",
       "      <td>1</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>98</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>GLU</td>\n",
       "      <td>2</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>99</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>3</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>ILE</td>\n",
       "      <td>4</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>101</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>GLN</td>\n",
       "      <td>5</td>\n",
       "      <td>A</td>\n",
       "      <td>A</td>\n",
       "      <td>1</td>\n",
       "      <td>1n0w</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3757</th>\n",
       "      <td></td>\n",
       "      <td>2060</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>THR</td>\n",
       "      <td>11</td>\n",
       "      <td>O</td>\n",
       "      <td>O</td>\n",
       "      <td>3</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3758</th>\n",
       "      <td></td>\n",
       "      <td>2061</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>ALA</td>\n",
       "      <td>12</td>\n",
       "      <td>O</td>\n",
       "      <td>O</td>\n",
       "      <td>3</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3759</th>\n",
       "      <td></td>\n",
       "      <td>2062</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>SER</td>\n",
       "      <td>13</td>\n",
       "      <td>O</td>\n",
       "      <td>O</td>\n",
       "      <td>3</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3760</th>\n",
       "      <td></td>\n",
       "      <td>2063</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>GLY</td>\n",
       "      <td>14</td>\n",
       "      <td>O</td>\n",
       "      <td>O</td>\n",
       "      <td>3</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3761</th>\n",
       "      <td></td>\n",
       "      <td>2064</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>LYS</td>\n",
       "      <td>15</td>\n",
       "      <td>O</td>\n",
       "      <td>O</td>\n",
       "      <td>3</td>\n",
       "      <td>6hqu</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3762 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     author_insertion_code  author_residue_number multiple_conformers  \\\n",
       "0                                              97                 NaN   \n",
       "1                                              98                 NaN   \n",
       "2                                              99                 NaN   \n",
       "3                                             100                 NaN   \n",
       "4                                             101                 NaN   \n",
       "...                    ...                    ...                 ...   \n",
       "3757                                         2060                 NaN   \n",
       "3758                                         2061                 NaN   \n",
       "3759                                         2062                 NaN   \n",
       "3760                                         2063                 NaN   \n",
       "3761                                         2064                 NaN   \n",
       "\n",
       "      observed_ratio residue_name  residue_number chain_id struct_asym_id  \\\n",
       "0                0.0          SER               1        A              A   \n",
       "1                1.0          GLU               2        A              A   \n",
       "2                1.0          ILE               3        A              A   \n",
       "3                1.0          ILE               4        A              A   \n",
       "4                1.0          GLN               5        A              A   \n",
       "...              ...          ...             ...      ...            ...   \n",
       "3757             1.0          THR              11        O              O   \n",
       "3758             1.0          ALA              12        O              O   \n",
       "3759             1.0          SER              13        O              O   \n",
       "3760             1.0          GLY              14        O              O   \n",
       "3761             0.9          LYS              15        O              O   \n",
       "\n",
       "      entity_id pdb_id  alternate_conformers chem_comp_id chem_comp_name  \n",
       "0             1   1n0w                   NaN          NaN            NaN  \n",
       "1             1   1n0w                   NaN          NaN            NaN  \n",
       "2             1   1n0w                   NaN          NaN            NaN  \n",
       "3             1   1n0w                   NaN          NaN            NaN  \n",
       "4             1   1n0w                   NaN          NaN            NaN  \n",
       "...         ...    ...                   ...          ...            ...  \n",
       "3757          3   6hqu                   NaN          NaN            NaN  \n",
       "3758          3   6hqu                   NaN          NaN            NaN  \n",
       "3759          3   6hqu                   NaN          NaN            NaN  \n",
       "3760          3   6hqu                   NaN          NaN            NaN  \n",
       "3761          3   6hqu                   NaN          NaN            NaN  \n",
       "\n",
       "[3762 rows x 13 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pro_dfrm67 = pd.merge(dfrm67, pro_dfrm)\n",
    "pro_dfrm67"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Get Observed RAW Residues\n",
    "\n",
    "```py\n",
    "dfrm67 = pd.merge(dfrm6, dfrm7, how='left')\n",
    "pro_dfrm67 = pd.merge(pro_dfrm, dfrm67)\n",
    "\n",
    "def yieldObserved(dfrm, group_col=['pdb_id','chain_id', 'entity_id']):\n",
    "    groups = dfrm.groupby(group_col)\n",
    "    mod = j.dropna(subset=['chem_comp_id'])\n",
    "    for i, j in groups:\n",
    "        yield i, len(j[j.observed_ratio.gt(0)]), len(mod[mod.observed_ratio.gt(0)])\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('1n0w', 4, 3, [(1, 'A', 203)], 1)\n",
      "('1y97', 2, 0, [(1, 'A', 203), (1, 'A', 211), (1, 'B', 209)], 1)\n",
      "('3eu7', 2, 1, [(1, 'A', 203), (1, 'A', 211), (1, 'B', 209), (1, 'A', 312)], 1)\n",
      "('6gy2', 4, 2, [(1, 'A', 203), (1, 'A', 211), (1, 'B', 209), (1, 'A', 312), (1, 'A', 224), (1, 'B', 224)], 1)\n",
      "('6hqu', 15, 7, [(1, 'A', 203), (1, 'A', 211), (1, 'B', 209), (1, 'A', 312), (1, 'A', 224), (1, 'B', 224), (1, 'A', 208), (1, 'B', 219), (1, 'C', 210), (1, 'D', 208), (1, 'E', 212), (1, 'F', 204), (1, 'G', 194), (1, 'H', 192)], 1)\n"
     ]
    }
   ],
   "source": [
    "def yieldObserved(dfrm):\n",
    "    groups = dfrm.groupby(['pdb_id', 'entity_id', 'chain_id'])\n",
    "    for i, j in groups:\n",
    "        mod = j.dropna(subset=['chem_comp_id'])\n",
    "        yield i, len(j[j.observed_ratio.gt(0)]), len(mod[mod.observed_ratio.gt(0)])      \n",
    "\n",
    "data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "\n",
    "for (pdb_id, entity_id, chain_id), observed_res_count, observed_modified_res_count in yieldObserved(pro_dfrm67):\n",
    "    data[pdb_id][entity_id][chain_id]['ob_res'] = observed_res_count\n",
    "    data[pdb_id][entity_id][chain_id]['ob_moded_res'] = observed_modified_res_count\n",
    "\n",
    "# json.loads(json.dumps(data))\n",
    "for i in traverse(data, ('ob_res', 'ob_moded_res')):\n",
    "    print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# json.loads(json.dumps(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "sns.set_style('darkgrid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1n0w_A_1->atom_len:203\n",
      "1n0w_B_2->atom_len:33\n",
      "1n0w_C_4->atom_len:3\n",
      "1n0w_L_3->atom_len:3\n",
      "1y97_A_1->atom_len:211\n",
      "1y97_B_1->atom_len:209\n",
      "3eu7_A_1->atom_len:312\n",
      "3eu7_X_2->atom_len:14\n",
      "6gy2_A_1->atom_len:224\n",
      "6gy2_B_1->atom_len:224\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlEAAAPwCAYAAAARSyshAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nOzde5hcVZ3v/3dd+pJOOulcmtwg6QBhEZEEQtAESAQUFUYEQUVRPAMiIDJnHJxR4YjzzByHGc8j4lEMSiA6g3gUhPiTO+p4SQIBjEEuphcESEIS0rl20t1JV3dV1++PqkpXOtXdu3ftqr1r1+f1PD521e5Uf/muVdXfXmvttSLpdBoRERERGZmo3wGIiIiIVCIVUSIiIiIuqIgSERERcUFFlIiIiIgLKqJEREREXIiX+wf29fWlU6nS3BEYi0Uo1WuLN9RGwac2Cj61UfCpjYLPaRvV1MR2Ac2FrpW9iEql0rS3HyjJazc1NZTstcUbaqPgUxsFn9oo+NRGwee0jZqbGzcNdk3TeSIiIiIuqIgSERERccHRdJ4x5t3AN621Zw94/kLg60ASWG6tXeZ5hFLQ4+vbWLpyI20dCSY31nH94hbOnzPZ77B8pZxUnvw2a6yLEYlE2N+drPr2K7Yv670gfqjGfjdsEWWM+TJwBdA14Pka4Hbg9Oy11caYh62120sRqPR7fH0btz71Gt3JPgC2dyS49anXAELfYQejnFSegW22P5E6dK2a26/Yvqz3gvihWvtdZLiz84wxlwIvAvdaaxfmPT8X+D/W2g9mH98OPG2tfWCo1+vtTaXLsbC8J9nHtff/hZ2dPSX5WX7a2Zmgr0CzRSPQPKau/AGNQDQaoa9Q8EWq5JwETanaaKDB2uywWALSfjPHj+KOj55MJBJx9e+Xr9nMQy++7eh7nfTlodooiO+FKxYczWXzp/vys0tlw64uvvKrv5LIFg0Dlet9FBR+9LsIcOM5x3HO7Emu/v0IFpavBRYUujbsSJS19kFjTEuBS2OBfXmPO4Bxw71eLBahqalhuG9zJRaLHnrtt/cd5OW3OzhtRhMtk0aX5Of55cE/by34fF8aznLZmcolEolQikOvKzknQVOqNhposDbLF4T2s9s7eG5zOw2No6iLu1tG+udt+0ml07znhIJ3SR/GSV8eqo2C9l74zfod/GV7J9eW6HPfL1s3trN570E+8I7JjKk/8ldpud5HQeFHv4tGIpijm1zXFPk1g1vFbHGwH2jMe9wItA/3j8q1xcGuvQcB+PBJk7ngHeEaSlz92i62dySOeH5KYx1fPec4HyJyrlS3/VZyToKmXLdmD9Zm+YLQfvf9aQsvb9vPzt2djKlz95F5oDvJcRMbHP23OOnLQ7VR0N4Lf926jwPdvaG73b+9I/M75oYzZzJlbP0R16ttiwM/+53bPI9gJGrQa8XcnbcemG2MmWCMqQWWAM8U8XqeSqQyQ6xu/3IMsusXt1A/4L+rPh7l+sUt/gQUAMpJ5SnUZvmC0n612Rh7UoWnbZzoSfVRG3P2WVRsXw7ae6EuHj30eRwmiWRmlCmMv2PcCFq/K5cR/1lljLkcGGOtvcsYcyPwJJlibLm1dvjx+TLpyc5T1zj84KokuUV61XYXxFCUk8ozsM2CendebSyzDqpnkLUvTvQk+w4VY8Mpti8H7b1QE4sOum6okuWK6jD+jnEjaP2uXBwVUdbajcDC7Nc/zXv+YeDhkkRWpN7cSFRIO/j5cyaHvnOOlHJSeSqhzfpHotyvbxnJSBQUn5cg5bUuHqUzkfQ7DM/1hni2w60g9btyCW3r5/7ycfrXn4hIIbk/xIoaiUo5H4kKm9qQjkQlkn1EgHjU3R2bEg6hfVfnhlqr9YNLRLyR+wwpZl1PT7IvtKPiw6mNR4taTxZUuSlat9teSDiE9l2d+6sxt55BRMSN3JqX3iJHoqp17UxtLFLUKF5QjXSKVsIptD0gt35BnVxEipEbQSp6JCpenX/Q1caiRa0nC6pqnqKVfqHtAbm/fLToT0SKcWhhucvRlGRfmlS6epcW1Maj4RyJSvZRp5mOqhfad3VCt5+KiAdyo9lu1/X0Ly2ozs+i2lh494nS7xcJbQ/Q7aci4oViN9s8dJNLlf7CrY1HSfWlSYXsHLleTecJIS6iElX+15+IeKPYzTZ7qny7ldyast6QjUYlUn36I13CW0T171iuOWsRce/QFgdJdyMp1T4SVXMof+EqonqS1XvHpfQLbQ/oSaWpjUW0h4eIFKW2yJGUat+zLrf4OmwjUb2p6t37S/qFtgfo9lMR8UJdsWuiqnxpgReblQZRYgTnIUp4hbYH9CS1EZqIFC93rIfb6aj+I6iqc1T80N2NLqdDgyqz2WZ1tqn0C22Vod1kRcQLkUiEunjU9XRUb5Vv/FvsFhFB1ZNKayRKQlxEaahVRDxSzCG6iSrfbqXYzUqDSrMdAmEuojQSJSIeqYlFil4TVa13coV3JEq/YyTERZQW/YmIV+ri7s9/O7Txb5X+wi12s9Kg0myHQIiLqMztp1r0JyLFq425P/8toc02gXBN56XTad0BLkCIi6hEUov+RMQbxRyiW+37RNWGcLPNVF+avnT1ji5Kv9D2gJ6UdpMVEW/UxqJFnJ2XuzuvOkfGaw5tthmeLQ5ybaoTMSS0VUaPzjUSEY/Uxosooqp8s826EG62mWtT/Y6R0PYA3X4qIl6pK2JNVLUfQFwbwjVRiSo/D1H6hbYH6PZTEfFKZosD9wcQx6MRolV6jmeueAzT2Xm9Vb7OTfqFtgfo9lMR8UpdkQvLq3naJ7c2NUwLyxNVPkUr/ULbAzQSJSJeqY1HXa/pSVT50oJ4NEIs6n6z0iCq9jsupV9oe0BmJKo6h89FxFs1Ra6Jqva7uGpjkVCNRPXfLFDd7SohLaKSfWlSaQ21iog36mLuDyCu9uk8yHwWh2uLA41ESUYoe0BvlR/4KSLeqo27P4C4J6WNf4tZUxZEPclMQajNNiU+3DcYY6LAUmAekACuttZuyLt+I/BZYGf2qWuttbYEsTqWqPIDP0XEW7VFHkBc7aPiNTH3a8qCKPffot8xMmwRBVwM1FtrFxljFgK3ARflXZ8PfMZau7YUAbqh209FxEu18Sh96cxSgXh0ZOtgdJNLJn+h2uKgyvf+kn5OesBZwBMA1to1wIIB108DbjLGrDLG3ORxfK7kRqI01CoiXihmw0htt5L5LA7TwvKEloxIlpORqLHAvrzHKWNM3FqbzD7+GfB9YD+wwhjzIWvtI4O9WCwWoampwXXAQ4nFojQ1NbCzJ9PBm8bWl+xniTu5NpLgUhsdaVxjPQD1o+toGl07on+bAhrqazzNaaW10ai6OH2R0n32l1usNvOrc9L40TQ11hX+ngpro2rkRRs5KaL2A415j6O5AsoYEwG+Y63dl338KHAqMGgRlUqlaW8/4D7iITQ1NdDefoDde7sASCZ6S/azxJ1cG0lwqY2OlOrJ/M24a08X0d7kMN99uIM9SaLpWk9zWmltFCPNwe7wfB7v6+gGoLsrQXsqVfB7Kq2NqpHTNmpubhz0mpOxyNXABQDZNVEv5V0bC7xsjBmTLajOBXxfG3Xo1HQNtYqIB3LTNm4Wl2cWllf3fkKZzUpDtMWB1kRJlpORqBXAecaYp4EIcKUx5nJgjLX2LmPMzcDvyNy591tr7WOlC9eZaj81XUS8VczRJdW+YzlkPotDtcXBobvzqrs4FgdFlLW2D7huwNOtedfvBe71OK6i6IRtEfFS7rPEzR1mvdonKlNEhejuvJ5UmppY9R4qLf1C+c7W7aci4qW67BFSbkaitGN55rM4VCNRGl2UrFD2gtxfPNriQES8UFvEmihN52V3LA/VSJQKY8kIZS84tGO5DiAWEQ8c2idqhIuj+9Jpkn3pqi+iasI2nZfs027lAoS0iOrVSJSIeMjtZpu6iysjjAvLNRIlENIiKqEtDkTEQ4em80ZaROkIKiCzpqwnlSadDsc2B5qilZxQ9gJtcSAiXuqfzhtpEZX9g67Kb4WvOXR3YziKqN7s3XkioawyenTCtoh4yO3Ccv1Bl1HMZqVBlNB0nmSFshf0JPuIRyPERnjauohIIXUuN9vMFVHV/gu3tojNSoNIWxxITih7QU9KHVxEvJObuhnpmqiERsUB99OhQdWT7Kv6dW6SEcpekFAHFxEP5T5PRrqmp1cLywH3C/ODSn+oS04oe0FvSgd+ioh3opEINbHIoZElp3LTV9W+3Uoxm5UGUU9Kf6hLRih7gUaiRMRrbvY60kG1GbUup0ODKrMmqrrbVDJCWWn0pLRDsIh4y80huj3JzPSfFpa72/E9qPQ7RnJC2Qt6dfupiHjMzSG62mwzoy5sa6I02yFZoewFCZ1rJCIeq41FtE+US7nP45GuKQuqhBaWS1Yoe4H+ShARr9XGoyOejjo0ElXlv3D7726s/CIq1Zcm1ZfW7xgBwlpEpfqq/m4YEfGWq4XlOoAYcL9ZaRDpgHvJF8pe0JPqq/q7YUTEW7Wx6IinozQSleF2s9IgyhWCNVVeGEtGKHtBT1ILy0XEW7XxKL1aWO5K/9l5lX93Xv9IlP5Ql7AWUbr9VEQ8Vhd3s8VBH7EIxKv8HM8wbbaZUGEseULZC7SwXES8VhuLjnhNTyKpBcjQv34oDNN5ub2/9Ie6QFiLKN1+KiIeq4lFRnx3Wa8+iwCIRSNECMdIlNa5Sb5Q9gId+yIiXquLuxiJ0hlrAEQiEVeblQaR7riUfKHrBX3pNMm+tG4/FRFPZY59GeE+UUmNROW4WVMWRLn/Bt28JBDCIir3V4K2OBARL9XGo5rOK0KNi7MHg6j/UGm1q4SxiNKdEyJSAu4Wlms6L6cuFgnVdJ5mOwRCWUTp1HQR8V5tPEoye+SHU7rJpV9tPEoiWfn7RCW0JkryxIf7BmNMFFgKzAMSwNXW2g151y8Evg4kgeXW2mUlitWR/uk8dXAR8U6uGOpN9RGLxhz9m8x2K1paAN5P5z2+vo2lKzfS1pFgcmMd1y9u4fw5kz17/cH0T+epXcVBEQVcDNRbaxcZYxYCtwEXARhjaoDbgdOBLmC1MeZha+32UgU8HA21ikgp5EYe9h7spdHhaFR3so+mUTWlDKti1MWjHOxN0ZlIFv1av7Y7uO2/XyeRnXnY3pHg3558le7eFOeZo4p+/aF0JlKAZjskw0kRdRbwBIC1do0xZkHetTnABmvtXgBjzCpgMfCA14E6lv3joKHW2V+KIiJONNRkfml+eNlzI/p3586eVIpwKs6omhjPb27nnDueLsnrJ1Jpbv31Bm799Ybhv9kD9XH9jhFnRdRYYF/e45QxJm6tTRa41gGMG+rFYrEITU0NIw7UiVgsyrxZE1n6yVM5xzQT12hU4MRi0ZK1v3hDbVTYJafPIFoTH/EdektmN3uez0pso6996B2seWO3J6/170/YQa/d9EHjyc8YytRxozhmytghv6cS26jaeNFGToqo/UBj3uNotoAqdK0RaB/qxVKpNO3tB0YUpFNNTQ3s23eQ06c10tnRXZKfIcVpamooWfuLN9RGg7vwxGZX/87rfFZiG00bFeeSk7xZs/Sj1RvZ3pE44vkpjXWe/YzhDJf/SmyjauO0jZqbGwe95mSoZjVwAUB2TdRLedfWA7ONMROMMbXAEuAZB68pIiLiyvWLW6gfsCapPh7l+sUt/gQkVcvJSNQK4DxjzNNkVhxdaYy5HBhjrb3LGHMj8CSZgmy5tXZr6cIVEZFql7sLz4+780TyRdLp8u7b0dubSpdyOk/Dp8GmNgo+tVHwqY2CT20UfCOYzlsLLCh0TSuvRURERFwo+0gUsBPYVO4fKiIiIuLCTKDgXSV+FFEiIiIiFU/TeSIiIiIuqIgSERERcUFFlIiIiIgLKqJEREREXFARJSIiIuKCiigRERERF5wc+xJ4xpgosBSYBySAq621G/yNSgCMMeuAfdmHbwL/BvwYSAMvA1+w1vb5E111M8a8G/imtfZsY8zxFGgXY8zngGuBJPANa+0jvgVchQa00XzgYeC17OU7rbU/Vxv5wxhTAywHWoA64BvAX9H7KBAGaZ8tePweCstI1MVAvbV2EfBV4Daf4xHAGFMPYK09O/u/K4FvA1+z1i4mcxbjRX7GWK2MMV8G7gbqs08d0S7GmCnA/wTOBD4A/Lsxps6PeKtRgTaaD3w77/30c7WRrz4N7M6+Z84H7kDvoyAp1D6ev4dCMRIFnAU8AWCtXWOMKXjGjZTdPKDBGPMUmb52M3Aa8Ifs9ceB95M55FrK63XgEuDe7ONC7ZICVltrE0DCGLMBmAs8X+ZYq1WhNjLGmIvI/CX9ReBdqI388gDwi7zHSfQ+CpLB2sfT91BYRqLG0j9lBJAyxoSlQKxkB4BvkanurwPuAyLW2tw2+R3AOJ9iq2rW2geB3rynCrXLwPeV2quMCrTRc8A/WWuXAG8A/4zayDfW2k5rbYcxppHML+uvofdRYAzSPp6/h8JSRO0HGvMeR621Sb+CkUNeBX5irU1ba18FdgOT8643Au2+RCYD5a9Ly7XLwPeV2stfK6y1a3NfA6eiNvKVMeYY4HfAvdban6L3UaAUaB/P30NhKaJWAxcAGGMWAi/5G45kXUV2fZoxZhqZiv8pY8zZ2evnAyv9CU0GWFegXZ4DFhtj6o0x44A5ZBbLij+eNMa8K/v1e4G1qI18Y4yZDDwFfMVauzz7tN5HATFI+3j+HgrLlNcK4DxjzNNkFvNd6XM8knEP8GNjzCoyd6tcBewClhljaoH1HD5nLf75EgPaxVqbMsZ8l8wvgijwv6y13X4GWeU+D9xhjOkBtgPXWGv3q418czMwHrjFGHNL9rm/B76r91EgFGqfG4HvePkeiqTT6eG/S0REREQOE5bpPBEREZGyKvt0Xl9fXzqVcjb6FYtFcPq91Ux5ck65ckZ5ck65ck65ckZ5cq4cuaqpie0Cmgtdc1RE5e+aO+D5C4Gvk9l/Ybm1dtlwr5VKpWlvP+Dkx9LU1OD4e6uZ8uSccuWM8uSccuWccuWM8uRcOXLV3Ny4abBrwxZR2V1zrwC6BjxfA9wOnJ69ttoY87C1dntx4VamdDrNtv3d9CbL/9dDY28fHfu1TtEJ5coZ5ck55co55cqZasvTlLF11NfE/A7DFScjUQN3zc2ZA2yw1u4FyN6BtZjMLqFV54+v7+Ef/79X/A5DRESkokxoqOEzpx/DpfOmVlwxNWwRZa190BjTUuCSq10+Y7EITU0NjoKLxaKOv9dv63dtpjYe5T8+8k4iRMr6s6PRCH19mj93QrlyRnlyTrlyTrlypprylOzr45cvbOM7f3iDn6zdwjWLj+WTpx/juJjyu04oZmG5q10+w7om6i+b93L8pNEsntFU9p9dSXnym3LljPLknHLlnHLlTLXl6eyW8bywZR8/fGYTtz7eyl1/fIO/fdcxXDx3KnXxoTcRKNOaqEGvFbPFwXpgtjFmQnZjsSXAM0W8XsVKp9O07ujkxKPG+B2KiIhIxTnl6HHc+bG5/ODjczlm/Ci+9bvXueSe53jghW30JPuGfwGfjLiIMsZcboy5xlrbS2b3zyfJFE/LrbVbvQ6wEmzd101nIsWJk1VEiYiIuHXaMU388ONzWfqxk5k2rp7/89sNXLL8eR78yzZ6U8ErphxN51lrNwILs1//NO/5h4GHSxJZBVnf1gnAHBVRIiIiRYlEIpw+YzwLjmniuU3t/PDpTfzHbzbw42ff4qqFM7jwpMnEY8HYKzwYUVS41rZO4tEIx04c7XcoIiIioRCJRHh3y3ju+eQ8vnvpO5k4upZbf/0aly5/nl+9tJ1kAEamVER5oLWtg+MnjaZ2mAVwIiIiMjKRSIRFLRP40eWn8J2PvJNxo2r430+9ykd/9Cde2bZv+BcoobIf+xI2uUXl586e5HcoIiIioRWJRDjz2AmcMWs8K9/Yw8/+vJXNew4yvaHGt5hURBXp7f0J9ncntahcRESkDCKRCEuOm8iS4yb6vh2E5p+K1NrWAcCJkwffR0JERETCR0VUkda3dRKLRjh+khaVi4iIVBMVUUVq3dHJsRMbht1VVURERMJFv/mLkE6naW3r1P5QIiIiVUhFVBHaOhK0H+zVeigREZEqpCKqCK3Zncp1Zp6IiEj1URFVhPU7OolFYHazFpWLiIhUGxVRRWht62DWxNHU18T8DkVERETKTEWUS7lF5UaLykVERKqSiiiXdnb2sOdAL3O0HkpERKQqqYhyaX1uUblGokRERKqSiiiX7I4OohE4QSNRIiIiVUlFlEvr2zqZOaGBUVpULiIiUpVURLmkncpFRESqm4ooF3Z1JtjV1YPRVJ6IiEjVUhHlQuuOzKLyOTruRUREpGqpiHJhfVsnEeCEo7RTuYiISLVSEeWCbetkxvhRjK6N+x2KiIiI+ERFlAvr2zq0P5SIiEiVUxE1QnsO9LCjs0froURERKqciqgRatVO5SIiIoKKqBHLFVHa3kBERKS6qYgaofVtHcwYP4oxdVpULiIiUs1URI2Q3dGpUSgRERFRETUS7Qd7eXt/Qse9iIiIiIqokWht6wC0qFxERERg2IU9xpgosBSYBySAq621G/Ku3wh8FtiZfepaa60tQay+06JyERERyXGyOvpioN5au8gYsxC4Dbgo7/p84DPW2rWlCDBIWnd0Mn1cPWPra/wORURERHzmpIg6C3gCwFq7xhizYMD104CbjDFTgEettf8+1IvFYhGamhocBReLRR1/bznYnV3MPbopUDFB8PIUZMqVM8qTc8qVc8qVM8qTc37nykkRNRbYl/c4ZYyJW2uT2cc/A74P7AdWGGM+ZK19ZLAXS6XStLcfcBRcU1OD4+8ttf3dvWzZe5CLTpocmJhygpSnoFOunFGenFOunFOunFGenCtHrpqbBz+hxMnC8v1A/itEcwWUMSYCfMdau8ta2wM8CpxaRKyBlVsPpeNeREREBJwVUauBCwCya6Jeyrs2FnjZGDMmW1CdC4RybdShReW6M09ERERwNp23AjjPGPM0EAGuNMZcDoyx1t5ljLkZ+B2ZO/d+a619rHTh+qd1RydTx9bRNEqLykVERMRBEWWt7QOuG/B0a971e4F7PY4rcFrbOjhRU3kiIiKSpc02HehMJHmrvVs7lYuIiMghKqIcsDu0yaaIiIgcTkWUA+sP3ZmnIkpEREQyVEQ50NrWweTGOsY31PodioiIiASEiigHWts6OVFTeSIiIpJHRdQwunqSbN57kBM1lSciIiJ5VEQNw+7oJI12KhcREZHDqYgahnYqFxERkUJURA2jta2T5jG1TBqtReUiIiLST0XUMLSoXERERApRETWEg70pNu45oEXlIiIicgQVUUN4NbuoXGfmiYiIyEAqooagncpFRERkMCqihtC6o5MJDTVaVC4iIiJHUBE1hNa2DuZMbiQSifgdioiIiASMiqhBdPemeHO3FpWLiIhIYSqiBvHazi760mh7AxERESlIRdQgcovKNRIlIiIihaiIGkRrWwfjR9UwubHO71BEREQkgFREDaJ1Rydm8hgtKhcREZGCVEQVkEj28cbuA9ofSkRERAalIqqADTs7SfWltVO5iIiIDEpFVAGtO7KLynVnnoiIiAxCRVQB69s6GVcfZ+pYLSoXERGRwlREFdDa1smJWlQuIiIiQ1ARNUBPso/Xd3VhjtJ6KBERERmciqgBXt/dRbIvrTvzREREZEgqogbQTuUiIiLihIqoAWxbJ411caaPq/c7FBEREQmw+HDfYIyJAkuBeUACuNpauyHv+oXA14EksNxau6xEsZbF+rYO7VQuIiIiw3IyEnUxUG+tXQR8Fbgtd8EYUwPcDrwfeA9wjTFmSikCLYfeVB8bdnUxR/tDiYiIyDCGHYkCzgKeALDWrjHGLMi7NgfYYK3dC2CMWQUsBh7wOlCnuntT3PJYK3sO9I743/am+uhNpbUeSkRERIblpIgaC+zLe5wyxsSttckC1zqAcUO9WCwWoampwVFwsVjU8ffmdPemGNtQS096RP/skPMmNPD+udNoaqh19wI+cJOnaqVcOaM8OadcOadcOaM8Oed3rpwUUfuB/E2TotkCqtC1RqB9qBdLpdK0tx9wFFxTU4Pj7813y3mzR/xvDtOTpL0nOfz3BYTbPFUj5coZ5ck55co55coZ5cm5cuSquXnwfSOdrIlaDVwAYIxZCLyUd209MNsYM8EYUwssAZ5xH6qIiIhIZXAyErUCOM8Y8zQQAa40xlwOjLHW3mWMuRF4kkxBttxau7V04YqIiIgEQySddrl4yL2dwKZy/1ARERERF2YCzYUu+FFEiYiIiFQ87VguIiIi4oKKKBEREREXVESJiIiIuKAiSkRERMQFFVEiIiIiLqiIEhEREXHByWabZWWMiQJLgXlAArjaWrvB36iCyxizjv7zC9+01l7pZzxBY4x5N/BNa+3ZxpjjgR8DaeBl4AvW2j4/4wuSAbmaDzwMvJa9fKe19uf+RRcMxpgaYDnQAtQB3wD+ivrVYQbJ0xbUp45gjIkBywADpIAryWxs/WPUpw4zSK7G4WO/ClwRBVwM1FtrF2WPmbkNuMjnmALJGFMPYK092+dQAskY82XgCqAr+9S3ga9Za39vjPkBmX61wq/4gqRAruYD37bW3uZfVIH0aWC3tfYKY8xEYB3wAupXAxXK07+iPlXIhQDW2jONMWeT+ZyKoD5VSKFcPYyP/SqI03lnAU8AWGvXAAv8DSfQ5gENxpinjDH/nS06pd/rwCV5j08D/pD9+nHgfWWPKLgK5epvjDF/NMbcY4wZ/ATO6vIAcEve4yTqV4UMlif1qQGstb8Ersk+nAm0oT5V0BC58q1fBbGIGkv/9BRAyhgTxBGzIDgAfAv4AHAdcJ9y1c9a+yDQm/dUxFqb26K/g8wwsFAwV88B/2StXQK8AfyzL4EFjLW201rbkf2g/gXwNdSvjjBIntSnBmGtTRpj/hP4Hpl8qU8NokCufO1XQSyi9gP5lWTUWpv0K5iAexX4ibU2ba19FdgNTPU5piDLX1PQCLT7FUgFWGGtXZv7GjjVz2CCxBhzDPA74F5r7U9Rv1WhepEAACAASURBVCqoQJ7Up4Zgrf0fwAlk1vyMyrukPjXAgFw95We/CmIRtRq4ACA7PfWSv+EE2lVk1oxhjJlGZhTvbV8jCrZ12Xl0gPOBlT7GEnRPGmPelf36vcDaob65WhhjJgNPAV+x1i7PPq1+NcAgeVKfKsAYc4Ux5qbswwNkivI/qU8daZBcPeRnvwri1M8K4DxjzNNkFtfpbrPB3QP82BizisxdHFdp1G5IXwKWGWNqgfVkhoKlsM8DdxhjeoDt9K9DqHY3A+OBW4wxuTU/fw98V/3qMIXydCPwHfWpIzwE/MgY80egBvgimX6kz6ojFcrVW/j4WRVJp9PDf5eIiIiIHKbsI1F9fX3pVKr0hVssFqEcP6daKJ/eU069pXx6Tzn1lvLpvXLktKYmtgtoLnSt7EVUKpWmvf1AyX9OU1NDWX5OtVA+vaecekv59J5y6i3l03vlyGlzc+Omwa4FcWG5iIiISOCpiBIRERFxwdF0Xv6ZWgOevxD4OpndaJdba5d5HqGIiJTV4+vbWLpyI20dCSY31nH94hbOnzPZ77BEAmfYkajsmVp3A/UDnq8BbgfeD7wHuMYYM6UUQYqISHk8vr6NW596je0dCdLA9o4Etz71Go+vb/M7NJHAcTISlTtT694Bz88BNlhr9wJk9ypaTObMJN9096b458ct+3tSJJMpP0MJlXg8pnx6TDn1lvLpjVe2d9A74G6n7mQfS1du1GiUyADDFlHW2geNMS0FLg08487R+T6xWISmpgbHAY5Ud2+KhvoaDqbS1MQiJfs51SeifHpOOfWW8umFgQVUzvaOBOPGjSISUY7disWiJf39V438zmkxWxwMPOPO0fk+5dji4J/fP1u3knpM+fSecuot5dMbF971LNs7EgWvXfT91Xxi/nTed0IztXHdlzRS6qPeK9MWB4NeK+ZdsB6YbYyZkN2afgnwTBGvJyIiPrt+cQv1AwqkuliED510FAezyyUuXPYsP1i9kZ2dhYstkWox4pEoY8zlwBhr7V3GmBuBJ8kUY8uttVu9DlBERMont+6p0N15fek0z23ay8/XbWP5ms38+Lm3eN8Jk7js1Om8c2qjpvqk6pT97Lze3lRaO5ZXHuXTe8qpt5RP7w2V07f2HuSBF7bxq5e309WTYs7kMZrqG4b6qPfKNJ23FlhQ6Jp6uoiIjNgx40dx4znH8ei17+bL7z2eAz39U30/XL2RXZrqkypQ9rPzREQkPEbXxvnYKdO4dN7UQ1N996zZzI801SdVQEWUiIgULRqJsLBlAgtbJhw21fdk607eMaWRy06dpqk+CR31ZhER8VT+VN8/nXs8XYmkpvoklDQSJSIiJTG6Ns7HT53GR0/pn+q7O2+q7xPzp/POqWP9DlPENRVRIiJSUvlTfZuzU30Pa6pPQkA9VkREymbG+FF8aZCpvrue1lSfVBaNRImISNkVmupb9sxmfvTsW7xXU31SIVREiYiIb4aa6jtpSiMf11SfBJh6pYiIBMLAqb5OTfVJwGkkSkREAiV/qu/ZTXv5+Z811SfBpCJKREQCKRqJsKhlAos01ScBpZ4nIiKBd/hU33F0aKpPAkAjUSIiUjEyU33T+egp046Y6nufaeayU6dpqk/KRkWUiIhUnIFTffev28ojr7TxxPodnDSlkcvmZ6b6amKacJHSUe8SEZGKNmP8KP7x3OMPm+r7+mOWC5c9l5nq6+rxO0QJKY1EiYhIKORP9a3ZuJf71x0+1feJU6dxkqb6xEMqokREJFSikQhnzJrAGbMmsGnPAR54Yduhqb53Tu2/q09TfVIs9SAREQmtmRMaDpvq29/dP9W37OlNmuqTomgkSkREQq/QVN9dz2xi+bObNdUnrqmIEhGRqjHcVN9lp07nvSdM0lSfOKJeIiIiVSk31ffINe/mH8/JTPXd8lirpvrEMY1EiYhIVRtTF+ey+dP52KmZqb6fr9t6aKrvPNPMZfOnc9KURr/DlABSESUiIsLgU32Pa6pPBqGeICIiMoCm+sQJjUSJiIgMIn+q75mNe7lfU32SR0WUiIjIMKKRCGfOmsCZeVN9D7+sqb5qp9YWEREZgfwNPI+Y6ntmE7s11Vc1NBIlIiLiwsCpvp//eSt3Pb2J5Ws01Vcthi2ijDFRYCkwD0gAV1trN+RdvwL4J2Af8GNr7T0lilVERCRw8qf6Nu45wC/ypvpOzk71nXvCJL/DlBJwMp13MVBvrV0EfBW4LXfBGDMJ+AZwNvAe4FPGmBbvwxQREQm+lrypvi+dcxz7upN8LTvV973fbdBUX8g4mc47C3gCwFq7xhizIO/ascAL1to9AMaY54GFwMbBXiwWi9DU1OA6YKdisWhZfk61UD69p5x6S/n0nnLqXhNw3eSxXHP28azcsIv/WrOJ7/73Bu78w+v8zTuncsXCmcw9epzfYVY8v/uokyJqLJmpupyUMSZurU0CrwEnGWMmAx3Ae4FXh3qxVCpNe/sBt/E61tTUUJafUy2UT+8pp95SPr2nnHpj3lGjue3D72BPbx/LV77Bwy+38cu/bDtsqk939blTjj7a3Dz4ujYnrbYfyH+FaLaAwlq7F/gH4EFgOfBnYJfrSEVERELq2OYxh031tR/s5WuPtfLhZc9xt+7qq0hORqJWAxcC9xtjFgIv5S4YY+Jkpu+WZF/rN8DNJYhTREQkFMbUxfnE/Ol8PO+uvh8+nbeB56nTeYfu6qsIToqoFcB5xpingQhwpTHmcmCMtfYuY0wPsBboBm6z1mokSkREZBgD7+p7YF3mrL7H/rqDk6eO5bJTp2mqL+Ai6XS6rD+wtzeV1pqoyqN8ek859Zby6T3l1FtO8tmZSPLIK23cv24rb7V3M2l0LZfOm8pH5k5l4ujaMkVaOcq0JmotsKDQNW22KSIiEhCHTfW9uZefrdNUX5CpiBIREQmYaCTCmcdO4MxjNdUXZCqiREREAqxlQgP/9N7j+fxZLTz8ShsPrNvK1x5rZdIfNNXnNxVRIiIiFWBMXZxPzp/OZQWm+t5vmvm4pvrKTkWUiIhIBTlsqm/3Ae5/YRuPvtLGo9mpvk/Mn8a5sycR11RfyamIEhERqVAtExv48nuP5/rsVN/967byvx5tpXlMLZfM1VRfqamIEhERqXDDTfVdNn86cyZrqs9rKqJERERCYqipvrnTsnf1aarPMyqiREREQmioqb7cXX0TGjTVVwwVUSIiIiGWP9X39Jt7+Pm6bfxg9SbuWaOpvmKpiBIREakC0UiEs46dyFnHTjw01ffIK9s11VcEFVEiIiJVJn+q71cvb+eBF7Zpqs8FFVEiIiJVakxdnMtPO5pPzJ+emer7c95U34lHcdmp0zTVNwQVUSIiIlVu0Km+V9o01TcEFVEiIiJyyMCpvvvXaapvMCqiRERE5Ai5qb7LTs1M9d2/TlN9A6mIEhERkUHFohEWHzeRxcdN5M3dB7h/3VYe/WubpvpQESUiIiIOzZrYwFfeN5vrz5rFw68cPtX30XnT+MjcKYyvoqk+FVEiIiIyIo31R0713bl6I/es2cR5Jx7FJ06dxolVMNWnIkpERERcGWqqb960sVw2fzrnHD8xtFN9KqJERESkaIWm+m5+ZD1Hjanl0pBO9amIEhEREc8MnOr7+bqth6b6cnf1hWWqT0WUiIiIeG6wqb5HQjTVpyJKRERESmqoqb6PnjKNi0+uzKk+FVEiIiJSFoWm+pau2sjdz2Sm+j5x6nTM5DF+h+mYiigREREpq/ypvjd2d3H/um08lp3qO2X6WD5+amVM9amIEhEREd8cO3E0X33fbL5QgVN9KqJERETEd/lTfavf3MPP/xz8qb5hiyhjTBRYCswDEsDV1toNedc/BXwJSAHLrbV3lihWERERCblYNMKS4yayJG+q79FXgjnV5ySCi4F6a+0i4KvAbQOufwt4H3Am8CVjzHhvQxQREZFqlJvqe+zahXzxPceyo7OHmx9Zz0V3P8ePnt1MVyLpa3xOpvPOAp4AsNauMcYsGHD9RWAckAQiQHqoF4vFIjQ1NbgIdWRisWhZfk61UD69p5x6S/n0nnLqLeXTvSbgC1PGct25s/n9qzv5rzWbWLpqI01j67lyUYtvcTkposYC+/Iep4wxcWttrvx7GVgLdAEPWWvbh3qxVCpNe/sBV8GORFNTQ1l+TrVQPr2nnHpL+fSecuot5dMbp00Zw2kXn8SOjgQzp4wteU6bmwffXd3JdN5+IP8VorkCyhgzF/gbYBbQAhxljPmY60hFREREHDiqsY66mpivMTgpolYDFwAYYxYCL+Vd2wccBA5aa1PADkBrokRERCT0Iun0kEuY8u/Om0tmzdOVwHxgjLX2LmPMdcBVQA/wOvA5a23PEC+5E9jkQewiIiIipTYTaC50YdgiSkRERESO5P8mCyIiIiIVSEWUiIiIiAsqokRERERcUBElIiIi4oKKKBEREREXVESJiIiIuODk2JfAM8a8G/imtfbsAc9fCHydzLl+y621y3wIr+IMkc8bgc+S2esL4FprrS1zeBXFGFMDLCezo38d8A1r7a/yrquPjoCDfKqPjpAxJgYsAwyQAq601r6ed119dIQc5FT91AVjzFFkjpk7z1rbmve8b3204osoY8yXgSvInN2X/3wNcDtwevbaamPMw9ba7eWPsnIMls+s+cBnrLVryxtVRfs0sNtae4UxZiKwDvgVqI+6NGg+s9RHR+5CAGvtmcaYs4FvAxeB+mgRBs1plvrpCGX74g/JnJIy8Hnf+mgYpvNeBy4p8PwcYIO1dm92B/VVwOKyRlaZBssnwGnATcaYVcaYm8oYUyV7ALgl73Ey72v10ZEbKp+gPjpi1tpfAtdkH84E2vIuq4+6MExOQf3UjW8BPwC2DXje1z5a8UWUtfZBoLfApbFkzvbL6QDGlSWoCjZEPgF+BlwHnAucZYz5UNkCq1DW2k5rbYcxphH4BfC1vMvqoyM0TD5BfdQVa23SGPOfwPfI5DVHfdSlIXIK6qcjYoz5W2CntfbJApd97aMVX0QNYT/QmPe4EWj3KZaKZ4yJAN+x1u7KVvuPAqf6HFZFMMYcA/wOuNda+9O8S+qjLgyWT/XR4lhr/wdwArDMGDM6+7T6aBEK5VT91JWrgPOMMb8HTgH+yxgzJXvN1z5a8WuihrAemG2MmQB0AkvIDAeKO2OBl40xc8jMO59LZoGvDMEYMxl4CrjBWvvbAZfVR0domHyqj7pgjLkCONpa++/AAaCPzGJoUB91ZZicqp+OkLV2Se7rbCF1Xd6aJ1/7aOiKKGPM5cAYa+1d2TsgniQz4rbcWrvV3+gqz4B83kxmBCAB/NZa+5i/0VWEm4HxwC3GmNxanmXAaPVRV4bLp/royD0E/MgY80egBvgicIkxRp+j7g2XU/XTIgXld30knU6X62eJiIiIhEaY10SJiIiIlEzZp/P6+vrSqVR1j37FYhGqPQduKG/uKG/uKG/uKG/uKG/ulCNvNTWxXUBzoWuOiigvdwRPpdK0tx9w8mNDq6mpoepz4Iby5o7y5o7y5o7y5o7y5k458tbc3LhpsGvDFlHaEVxERJzoTCTZt7uL/fu7/Q6lokSjMG7cKL/DEBecjETldrC+d8Dzh3YJBTDG5HYJfcDTCEVEJPB2d/Xwif9cS/vBwfbqlaFcdUYLn180w+8wZISGLaKstQ8aY1oKXHK1S2gsFqGpqcFxgGEUi0WrPgduKG/uKG/uKG8j839XbaQzkeTfLn4ndTHdszQSv2lt47/WbOKT7zqGlomjh/8Hcojf79NiFpa72iVUa6I09+2W8uaO8uaO8ubcW3sP8v+ef4uPnDyFj592tPI2Qu9sbuCPr+3im4+18u8XzvE7nIpSpjVRg14r5s+FQ7uEGmNqyewS+kwRryciIhXoztUbqYlG+OyimX6HUpEmjq7lqjNa+M2rO3lle4ff4cgIjLiIMsZcboy5xlrbC+R2CX0G7WQrIlJ11rd18Gu7k08tOJpJo2v9DqdiXXXmLMaPquGOlW+iTbArh6PpPGvtRmBh9uuf5j3/MPBwSSITEZHAu+OPb9I0qoZPLzja71AqWmN9nM8unMG3fvc6z27ay8KWCX6HJA5o9Z+IiLjy7Ma9PLe5nc8unMGYutAdxVp2l8ybyrRx9Xzvj2/Sp9GoiqAiSkRERqwvneZ7K99k2rh6Lpk71e9wQqEmFuX6M1t4dWcXT7Xu9DsccUBFlIiIjNivW3did3Ry3ZkzqY3rV4lXzjuxGXPUGO5c9SY9yT6/w5FhqOeLiMiI9Kb6uHP1RmY3j+YDJx7ldzihEo1EuGFxC9v2J3joxbf9DkeGoSJKRERGZMWLb7N1Xzd/t2QW0UjE73BC590zx3P6jCbuWbOZzkTS73BkCCqiRETEsa6eJHc/s5kFM5pYOHO83+GEUiQS4YbFs2g/2Mt9f9ridzgyBBVRIiLi2H1/2sLeg73csHgWEY1Clcw7pjTyvhOauW/tFnZ19fgdjgxCRZSIiDiyu6uHn/xpC+87YRInTRn8KAzxxufPaqEnleaeZzb5HYoMQkWUiIg4cs+azfQk+/j8WbP8DqUqzBg/io+cPIUVL23nrb0H/Q5HClARJSIiw9rSfpCHXnybi+dOZcb4UX6HUzU+u2gmtbEId67e6HcoUoCKKBERGdadqzKHDF+tQ4bLatLoWj512tH82u7krzqcOHBURImIyJDWt3XwlN3J5Tpk2BefWnA0TaNq+J4OJw4cFVEiIjKkO/74JuPq41yhQ4Z9MaYuczjxnza38+ymvX6HI3lURImIyKAOHTK8aKYOGfbRJXN1OHEQqYgSEZGC+tJp7lj5JtPG1nGpDhn2VW08yuezhxP/WocTB4aKKBERKeg3dietOzq57qwWHTIcAO8/sZkTmkezdPVGelM6nDgI9K4QEZEj9Kb6WLpKhwwHSTQS4YYls9i2r5uH/qLDiYNARZSIiBwhd8jwDYt1yHCQLJw5ngUzmrhbhxMHgoooERE5zKFDho8Zx6IWHTIcJDqcOFhURImIyGF++qetmUOGlxyrQ4YD6KS8w4l363BiX6mIEhGRQ3TIcGU4dDjxms1+h1LVVESJiMghy9dsJpFMcd2ZLX6HIkOYMX4UF588hYdefFuHE/tIRZSIiACZQ4YfzB4yPHNCg9/hyDCuXjSTmqgOJ/aTiigREQHgB6uzhwwvnOF3KOLApNG1fGpB5nDi9W06nNgPKqJERITWtg6ebM0eMjymzu9wxKFPZw8nvuOPb/odSlVSESUiItyxUocMV6Lc4cTPbW7n2Y06nLjcVESJiFS5Zzfu5dlN7Vy1cIYOGa5Al8ydyrSxdXxvpQ4nLjcVUSIiVSx3yPDUsXV8dN40v8MRF2rjUa47qwW7o1OHE5fZsH9yGGOiwFJgHpAArrbWbsi7fiPwWSDXctdaa20JYhUREY/lDhn+l/ONDhmuYB848SjufX4Ld67eyLknTKImprYsBydZvhiot9YuAr4K3Dbg+nzgM9bas7P/UwElIlIBelN93Lk6c8jwB+fokOFKFo1E+Lsls9i6r5sVL+pw4nJxUkSdBTwBYK1dAywYcP004CZjzCpjzE0exyciIiWy4sXtbGnv5gs6ZDgUFs4cz4JjxnH3M5vp6tHhxOXgZAXhWGBf3uOUMSZurc210M+A7wP7gRXGmA9Zax8Z7MVisQhNTdW9iVssFq36HLihvLmjvLkT9rx1JpIsf3Yz7541gQtOme7ZGXlhz1upeJW3my54B5f+8BkefLmN/3nubA8iCza/+5uTImo/kH+AUjRXQBljIsB3rLX7so8fBU4FBi2iUqk07e0H3EccAk1NDVWfAzeUN3eUN3fCnrdlT29id1cPty2awb593h0bEva8lYpXeZsxpob3nTCJu1e9yd+YZiaOrvUguuAqR39rbh78DEkn03mrgQsAjDELgZfyro0FXjbGjMkWVOcCa92HKiIipbbnQOaQ4feeMImTpo71Oxzx2OfPmkVPso/lOpy45JwUUSuAbmPM08DtwD8YYy43xlyTHYG6GfgdsBJ4xVr7WOnCFRGRYuUOGf68DhkOpRnjR3Hx3Kk8+OLbbGnX4cSlNOx0nrW2D7huwNOtedfvBe71OC4RESmBLe0HefAvb3PRyTpkOMyuXjSTR19p485VG/m3D83xO5zQ0kYSIiJV5AerNxKPRvjcIh0yHGaTRtdy+YKjeUqHE5eUiigRkSpx6JDh06brkOEqcMWCoxlXH9fhxCWkIkpEpEp8f+XGzCHDpx/jdyhSBmPq4nx20UwdTlxCKqJERKrAs5v2smbTXh0yXGUuzR5OfIcOJy4JFVEiIiHXl07zfR0yXJVq41GuPbOF1h2d/MbqcGKvqYgSEQm539idrG/r5LozW3TIcBX64JyjmN08mqWrNtKb6vM7nFDRu0lEJMTyDxn+wIk6ZLgaRSMRblisw4lLQUWUiEiI/fKl/kOGY1EdMlytFrVkDie+Z40OJ/aSiigRkZA60JPi7mc2cdox4zijZbzf4YiPIpEINyw5lj0Hevnpn7b6HU5oqIgSEQmp+9ZuYc+BXm5YPItIRKNQ1e6kKY2894RJ/ORPW9jd1eN3OKGgIkpEJIT2HOjhJ89v4dzZk3inDhmWrM+f2UIimdLhxB5RESUiEkKHDhk+q8XvUCRAZk5o4OK5U3lIhxN7QkWUiEjI5B8y3KJDhmWAqxfOIB6N8IPVG/0OpeKpiBIRCRkdMixDmTSmjssXHM2TrTtp1eHERVERJSISIratkydbd/JJHTIsQzh0OPFKHU5cDBVRIiIhcsfKNxlXH+czOmRYhjCmLs5VC2fw7KZ2nt2kw4ndUhElIhISz+mQYRmBj86bxtSxdXxfhxO7piJKRCQE+tJp7tAhwzICtfEo153Zwvo2HU7sloooEZEQ+O2ru1jf1sm1Z+iQYXEudzjxnat1OLEbeqeJiFS4ZKqPpave5PhJo/ngHB0yLM5FIxG+sHgWW9q7WfHidr/DqTgqokREKtyK7CHDN+iQYXHhjJbxnHbMOO5Zs4kDPSm/w6koKqJERCpY7pDh+UeP44xZOmRYRi4SifB3i2ex50Av963d4nc4FUVFlIhIBftp9pDhv1uiQ4bFvZOmjs0cTvz8FvYc0OHETqmIEhGpUHsP9HDv81s4R4cMiwd0OPHIqYgSEalQ92QPGb5ehwyLB2ZOaOCik6fy4F90OLFTKqJERCpQ7pDhD588RYcMi2c+t0iHE4+EiigRkQr0w6c3EYtG+NyimX6HIiEyaUwdl582nSdbd2LbOv0OJ/BURImIVBjb1skT63dw+WnTadYhw+KxK04/RocTOzTs4UrGmCiwFJgHJICrrbUb8q5fCHwdSALLrbXLShSriIgAd6zSIcNSOrnDiW///Ru8f+kztB/sZXJjHdcvbuH8OZP9Di9QnIxEXQzUW2sXAV8FbstdMMbUALcD7wfeA1xjjJlSikBFRCR7yPDGvVz5bh0yLKXTWBcDYO/BXtLA9o4Etz71Go+vb/M3sIBx8g48C3gCwFq7xhizIO/aHGCDtXYvgDFmFbAYeMDrQJ3qSfbx/VVvsrsruPtc1NbG6elJ+h1GxVHe3FHe3Alq3l56u4MpjXV89BQdMiylc9fTR25z0J3s49anXmP1G3t8iOhIkUiEa99zHEePrvEtBidF1FhgX97jlDEmbq1NFrjWAYwb6sVisQhNTaW7k6QrkcTu7GJXZ3CLKCJA2u8gKpDy5o7y5k5A8zaqNsZNHzyRyZPG+B1KQbFYtKSf8WEVtLy1dSQKPt+d7KN1R1eZoyksGoFdXT28c/qQZUdJOSmi9gONeY+j2QKq0LVGoH2oF0ul0rS3HxhRkCP1g4/NLenrF6upqaHkOQgj5c0d5c2doOctqLEFPW9BFbS8TW6sY3uBQmpKYx2/uHJBgX/hj3Lkrbm5cdBrTtZErQYuADDGLAReyru2HphtjJlgjKkFlgDPuA9VRERE/Hb94hbq44eXCPXxKNcvbvEnoIByMhK1AjjPGPM0mQHuK40xlwNjrLV3GWNuBJ4kU5Att9ZuLV24IiIiUmq5u/CWrtxIW0dCd+cNIpJOl3fSv7c3lQ7SkKUfgjZsWymUN3eUN3eUN3eUN3eUN3fKNJ23Fig4h6nNNkVERERcUBElIiIi4kLZp/OAncCmcv9QERERERdmAs2FLvhRRImIiIhUPE3niYiIiLigIkpERETEBRVRIiIiIi6oiBIRERFxQUWUiIiIiAsqokRERERccHJ2nnjAGFMDLAdagDrgG9baX/kaVAUxxhwFrAXOs9a2+h1PJTDG3AR8GKgFllpr7/E5pMDLvk//k8z7NAV8Tv1taMaYdwPftNaebYw5HvgxkAZeBr5gre3zM76gGpC3U4DvkelzCeAz1to2XwMMqPy85T13OfB31tpF5Y5HI1Hl82lgt7V2MXA+cIfP8VSM7C+2HwIH/Y6lUhhjzgbOAM4E3gMc42tAleMCIG6tPQP4V+DffI4n0IwxXwbuBuqzT30b+Fr2cy4CXORXbEFWIG//l0wRcDbwEPAVn0ILtAJ5I1uAfpZMfys7FVHl8wBwS97jpF+BVKBvAT8AtvkdSAX5APASsAJ4GHjE33AqxqtA3BgTBcYCvT7HE3SvA5fkPT4N+EP268eB95U9osowMG+fsNa+kP06DnSXP6SKcFjejDETgf8AvuhXQCqiysRa22mt7TDGNAK/AL7md0yVwBjzt8BOa+2TfsdSYSaROXX8Y8B1wH3GGF/+UqswnWSm8lqBZcB3fY0m4Ky1D3J4oRmx1uaOwegAxpU/quAbmDdr7dsAxpgzgBuA230KLdDy82aMiQH3AP9Apq/5QkVUGRljjgF+B9xrrf2p3/FUiKuA84wxvwdOAf7LGDPF35Aqwm7gSWttj7XWkvnLtuDZT3KYfyCTtxOAecB/GmPqGQn1KgAAIABJREFUh/k30i9//VMj0O5XIJXGGHMZmRH3v7HW7vQ7ngpwGjAbuBP4GfAOY8x3yh2EFpaXiTFmMvAUcIO19rd+x1MprLVLcl9nC6nrrLXb/YuoYqwC/t4Y821gKjCaTGElQ9tL/wjBHqAGiPkXTsVZZ4w521r7ezJrP3/nczwVwRjzaeBa4Gxr7R6/46kE1trngJMAjDEtwM+stWWf1lMRVT43A+OBW4wxubVR51trtVhaPGetfcQYswR4jsyI8xestSmfw6oEtwPLjTErydzVeLO1tsvnmCrJl4BlxphaYD2ZpQsyhOy01HeBzcBDxhiAP1hr/9nXwMSRSDqdHv67REREROQwZR+J6uvrS6dSpSncYrEIpXpt8ZbaqnKorSqH2qqyqL0qQ01NbBeDrCktexGVSqVpbz9Qktduamoo2WuLt9RWlUNtVTnUVpVF7VUZmpsbNw12TXfniYiIiLigIkpERETEBUfTeYXOqsk+fyHwdTK7by+31i7zPMIiPb6+jaUrN9LWkWByYx3XL27h/DmT/Q5LAkB9Q0SkNKrl83XYIip7Vs0VQNeA52vI3A58evbaamPMw0Haw+fx9W3c+tRrdCcz+79t70hw61OvAYSyMcU59Q0RkdKops9XJyNRubNq7h3w/Bxgg7V2L4AxZhWwmMwZcb7p7k3xL0+8yp4DPbz09n56B9z50J3s438/+Sq/fDEwtV6ovfeEZj5+6rSC17p7U9zyWCv7u8t/jKD6hnPxeJRksm/4bxTfFdNWo2tj/OsFJzKmLk46neY/frOBjXsyi54vmz+dc2dP8jJUALbv7+bWX79GooT964J3HMVFJ08F4JXtHfy6dSd//55ZRCIROhNJvv5YK109KeLRCF88+1hmN48p+DovbNnHD5/eSJ+HN9OF9b3l5PM1Fo1ww+JZvGNKox8hembYIspa+2B2N9CBxgL78h47OicpFovQ1NTgOMCRiMWiNDU1UFsbI94TPaIRc3pTaeJxLQcrNdvWSfLVnVxzzvFHXIvFouxJpvn9ht3MPmoM4xtqyhqb+oZzkUhEOakQbttqf3eSP2/Zx7aDSd41eSzdvSkeevFtjhk/ip2dCf745h4uOX2G5/Gufmsfz2zcy9zp46iv8b6PtW7v4KlXd/E/Fh8HwJrnt3Df2i185YI5jKqNYd/czco39mAmj8G2dfLyzgOcPvuogq/1/HNvsXbLPk6fOd6z+ML63hru8zUNPL9xLy9s7+SMEyt7ZKqYLQ72kzkbKcfROUml3uKguyvBv3zgBAAuvOtZtnckjvi+KY11fP/Sk0sSg/T7x1++wrb93QXbu6mpgT3Z579wZgtnHjuhrLGpbzin27Arh9u2emV7B3973zradnfR3lTP7q4eAD45fzoP/mUbXQd7S9IH2vd3A/AvHzyBo5tGef76N654mR2dPYdi3539eW+17eeoxjq27cqsUvnKucdz1f97gX0dhT+vADq6emioiXn6+RDW99Zwn6/pdJp3fXsl+7sSFfHf39w8+GhZMSXwemC2MWZCdov/JcAzRbye565f3EL9gCq/Ph7l+sUt/gRUZWrjUXqGGKrODeHXxiPlCukQ9Q2RfqNrM8cDdvWkDvv/0bUxamNRelKlmXJKZF+3Nlaa0ZjRdXG6evqXC+S+3p/I/H9HdinBxNG1AEP+d/ak+koWZ9gM9/kaiUSoG+b3Q6UY8UiUMeZyYIy19i5jzI3Ak2SKseXW2q1eB1iM3AK2arhDIIhqYxF6h/hQ6i3xB+hQ1DdE+vUXUcnD/r+/iCrNrtq9yRIXUbUxuhL9R0bmvs4VT7liamx9nJpYZMj/zv+fvXePl+yq6n2/61GPXfvRu7uz052EkAQI0yaBSMIjQBqiHPAEjeH6uvdwfAV5iR71oKIG0HuvgNePBDwo6AWNaJQjIuR4AgLxICIEEBKiBkgm6YSQF93p1+79rseqdf5Ya1WtXXvVY1fVelWN7+eTT/Zea/eqWXPONeeYY/zmmLVGk+IEht7iYJDx1avvKTGitNYPAlf6P38wdP1W4NZYSjYmrjl0QCbGlCjaJtUeg1K14bb+Lg2kbwiCx2zRmwo2fA/URssTZXvvcUweg2ASjWsMmC1abNRDRlTgidoKPFF1LCNkLPb4njWnSUmMqIHpN77G6eFMEukRQmz0G5QCT1RBXOSCkCozBRMDWPONpzXfYzNb8oyLXh7lUWgZUVY8If3Zok210WyVPwhTrlbrgGdMzZVsDMPoO6nXHJdCTOWcRuL0cCaJzF5CbPQflLx7JTGiBCFVDMNgtmSxXu0M58XsiWo0MQ1vu3scdNN6tTxR1QYLZc8L10/DWWuIJmqc9KvvvCA9QoiNgv+SuG70aiMYmGV1JwjpM1u0uwjL49OuVBsuBcvEMGIyokqdWq8OTdRWg/myl16l3/esirB8rPSLVOQF6RFCbJQsLx+I0yU7XeBiF52BIKTPbNFqG1HVDmF5TJNdPWadUaD1CgTlwfdaDf1/oRTyRPUIL9VFWD5WvPoWI0oQuhJ4mKpdXpS2J0q6oSCkzWzRDoXzHCzT24bez7gYharTjPX9D4fzGk23dQzJyjZPlG9EDSAsF0/U+IjTw5kk0iOE2AhWmPVG9+y14b8TBCE9wjvZNmoOs0WrJbiOS1hed5qUYgznB0bURs1hI5QvajWUJ2ohbET1yxMlY9XY8IxWEZYLQleCVVtXT5TTxADsmESlgiAMjicsDzRRjZYBErewPE7DpBXOqzVaoUrwPFCu67KyVWfeD+cVBhKWy1g1LiScJwh9CAbHbqvYQGMQl6hUEITB8TRR7XBeYIAULYNG06XZZYPIKHhpA2I0onxh+VrNaRmIluF5oDbqDo5LyxNVGiDFgYTzxofkiRKEPrQ8UV1Wd6IxEITsEN6dt+aH86D9HschLq81khKWN1oG4tJciZVqo7VDb36bsLyPJ0rCeWNDUhwIQh+CFWa3ganmNCW9gSBkhGB3XtN1Wa82Wl6ctkc5Dk9UvMLyIInoes1pGYjnLJRY3aq3xOVtTZQhwvIE6XcsWF6QHiHERsk/WLjbwBT3KlQQhMGZLbWPftkezuutbRyFmtOMNdluK4loyIg6sFCm5ricWK8BMFcKC8t7nJ0nwvKxUrTi09olifQIITb6eaKCRHuCIKRPZdtONqf1e5zhvGojfm90pWCxUWu00jecs1AC4NEzWwADZSxvui51xxVh+RgRYbkg9CHwMnVb3cWdaE8QhMGZC+VU6tydB90XQ6OQxBgwW7K3eaIOLpQBeHTZM6LmB0hxEIQyJZw3PuTsPEHoQ78VrByjIAjZIQjfrWzV2aw3mSu2PTQQn7A87hDZXNFL3RAIyw/OB56oTQAWSt6xL4UeRlTw3SWcNz6KtonTdLueaJEXpEcIsREYSF1THDiSd0UQskLgeTq+5mmFWsJy/x2NQwQcd4oDCHYdenmiKgWLPTOe0fTomS0M2t+zZBvUHTfyrM/AuJJF3/joNz/kBekRQmwEq7auKQ5ky7AgZIbAmHh8rer93qGJyqOwHLzvtRaI5UtW66y8x85sMV+2Mf08dW0NpxhRSdBvfsgL0iOE2AhWsN1THIiwXBCyQhDOe3y1tu33lscghiM6ao14UxyAn7qh2mC96nmiAg3Ues1p5YiCkIYzYlKXcN74idPDmSTSI4TYKPYRlkuKA0HIDpVWOK+67feWxyAuT5Qd8+68os1G3RfLl+xthlOwMw9CGs6I79nyRMl4NTbi9HAmifQIITb6CcvjTrQnCMLgBOG7buG8cXsMvKNkSMgT5bBW9bKwW6bR+m5hg6q3ERXszhMN57iI08OZJDKDCbExSMbyuPUQgiAMRsEyKdkmj6/6RlRp++68cWtXgsVV7CkOihYucHK92jKeAg/UNk/UIOE8Ga/GRpweziSRHiHEhmUa2Gb3oxREWC4I2WK2aPG4vztvrhjv7rykxNqBMfj4Wq31c+CBmt8Wzuuu4RRh+fjpd0B9XpAeIcRKrwR2cnaeIGSL2aJFw8/bM1vs9ESNN+wSLK4KCeSJAi98ONfhiZr3c0RBbw2nCMvHT2C0yu48QehBr6MURFguCNkiMJyAnce+xOSJij3FQcR3mi97xlM4nFfooeEUT9T4iatfJY30CCFWipYR+ZI0nCZOAqJSQRAGJzAyyraJZXqegriE5cG4EPvZef53grZBtRARzisNYkTJom9stDVoIiwXhK54h0x2T14nwnJByA6B8Hq2FPbQxBN2SVJY3vlzYDwthL9njzMC28JykR+MC/FECcIAFKzocF5SeghBEAYnMJ7ChodhGBQtIwZPlLe4ij3FQcna8XNLExXlieqV4kDGq7Ehx74IwgCUugjLg1VtSVZ2gpAZWp6okBEFnvGQX0+UvePnYHfedk1Uj915kuJg7MixL4IwAF09US09hHRBQcgKgZERDueBZzzUu5w8MCxJjQFR4bxDB+Y4d6HEeXvKrXs9j30RYfnYmZRjX+z+fyIIw+OdjB7hiaonswoVBGFw5vxw11ynJ8oyx54UseWJitkwCZKIVhtN5nwj8ZJzFvi7Vz93298NkuJAUrKMj0nxRPU1opRSJvBe4DKgCrxKa30kdP8NwM8Ax/1Lr9Va6xjKKuSQom2yvNnYcV1WdoKQPSoFz3iqRITz6uMO5yW4461SsKg2mju+V5hex1TVHJeiZWAYYkSNi1JLE5Xv3XmDeKJeDpS11s9TSl0J3AhcF7p/OfCTWus74yigkG+8MEB3TZQIywUhOwTC67COCHonzR2WpFIcgPe9Tm/Wt4nMO+l3ALGIyseLZRoY5P/Yl0GMqKuATwJorb+klHpWx/0rgN9QSh0EPq61/p1eD7Msg8XFylCF7YdlmbE9WxiO2ZkCjdObO9qlccY7n2v/nhlps4wj71V+GLWtzt47C8D+hfK251RKNk1jvGO35RtqS/tmWVwo9/nr0ViYKcLyFuctzVMqRBtSrut5REzb2vk9LZNS1PURmfZ3q1QwMXJeB4MYUQvAmdDvjlLK1loHMZq/Bt4DrAC3KKV+QGv9sW4PcxyX5eWNoQvci8XFSmzPFoak6bJZc3a0y1bN6z7VzZq0WcaR9yo/jNxWde+9tJrNbc8xcdnYqo+1H5zxDzreXK+y3IzXG1G2vHM8N9a22OwRkitaBivr1R3fc22jRsE0xv4eTPu7VTBN1jayPwcsLc13vTeIf3IFCD/BDAwopZQB/L7W+oTWugZ8HHjmCGUVJoy+KQ7ERS4ImaGVJypid15cZ+clMQbMFi1mi1ZfTVPB6pIcWA5Lj4U4UmckzSC94nbgZQC+Juru0L0F4GtKqTnfoPpeQLRRQotCl2Nf2rtdZGAShKywr+KdKbe/Uth2vWhHaxtHIck0J/tni+ybLfb9u1KXsz5rTlM2wcRAHElck2aQcN4twEuUUl8ADOB6pdQrgDmt9fuUUjcAn8HbufdprfXfx1dcIW+U7Oj8MuKJEoTscc5CmT//z89EnT237XosKQ6cJpYBthm/sPz1V13Ies3p+3eFLp7zmtOU9AYxEIeHM2n6GlFa6ybwuo7L94bu3wzcPOZyCROC95I0cV13mytdUhwIQjZ52sGd+o9YPFENN7EQ2d5Kkb0DaJe7e6JcWfDFQBz9KmmkVwixEgySjeb21UZVjlEQhNxQtIxI42IUshgi65bKodbIXlkngTg8nEkjvUKIlWDg6RQPts6iktWdIGSeYhfB9ShkMfdSNw1nPYNlnQTEEyUIfSh0SWDX9kSJzkAQsk6xS5hrFGqNZuY2lnQL51XFExULcXg4k0Z6hRArJds/GT3CE2UaXtZaQRCyTRwZy+tOM/Zz83ZLtxQHdRGWx0IcHs6kyVYPFiaOtieqUxPlULBMOYtKEHJA0TJpNF2c5vgmvGoje4ZJL0+UCMvHTxwezqSRXiHESsmODufVZFAShNwQ6IHGqV+pOdkbA7qnOHAzF3qcBERYLgh96HYyehZ35giCEE2xy2JoFGpOcikOBqXYQ1ieNYNvEhBhuSD0ITCUOl+Uar0ponJByAnBuzrO0EsWheVFq3s4L2tlnQS61XeekF4hxEqw0tyR4kC2DAtCbih20TaOQi2DwvKivVPo3HRdGk03c2WdBOLYsJA00iuEWGmtYCNSHMjKThDyQbew/Chk0RMVJSyXnHbxUbQlxYEg9KStpdi+uhNhuSDkhzg0UZ7OKFsh/ShheXD2Z9Z2Ek4CRcvEcXeeaJEnZBYTYqXQZQUbpDgQBCH7FLskzR2FagZ3vJX8VA5Ntz2pB7vHZNE3frppZvOE9AohVrqmOMigHkIQhGiKdnRYfhSyuOOtECGgDyb4rBl8k0DLw5njkJ70CiFWumkpqnURlgtCXohDE5XFo1SiwpbBphhZ9I2fOMLESSO9QoiVbmEAbwAVjYEg5IH2LtvxaFdc1/WE5RlbSEXtQgwMx6yVdRII5oDO3dt5QnqFECvd3LWS4kAQ8sO4tStO08Ule96dqPEq+M5ZK+sk0O5XIiwXhEgCjUHnS5LF7c2CIEQzbmF5taUzypY3Oup7ZrWsk0AcYeKkkVlMiBXTMChYxo7zkWoNEZYLQl4YtwC47ocFsyYsj/REZbSsk4BoogRhAKJS+0uyTUHID92S5g5LNaM73ko9PFEiPxg/caTOSBrpFULsRKX2r2Uw0Z4gCNF0S5o7LPWM5l4qRBiLkuIgPsQTJQgDULC2p/ZvNF2cZvYS7QmCEM24tSvBbqysjQGliHCepDiIjzgOtk4a6RVC7JRsM3Jll7VVqCAI0dimgcH4PAbBGJC1PFGFiBQHdRGWx8a4PZxpkK0eLEwknSejByu7rA2ggiBEYxiG9x6P2ROVtZB+lLC8KsLy2JDdeYIwAEXL3JZfRpLXCUL+iNI2DkstozqjKKFzVss6CQT13bl7O09IrxBip2iZ2zLS1iR5nSDkjqI9TiMqm96dKI2OyA/iI/D81cUTJQjdKXSEAWqiMRCE3FHs2CAyCrWMCst7nZ1nmzJejRtJcSAIA1DqCANI8jpByB9eOG/MKQ6yZkR1EZaXbBPDECNq3EiKA0EYgIJldDlGQbqfIOSFOITlhawJyyOEzl5i4GyVc1KwTQPTyLew3O73B0opE3gvcBlQBV6ltT4Sun8t8JtAA7hJa/3+mMoq5JSSbW47O080BoKQP8YpLM+qJyo62aYrO4ljZJwezjToa0QBLwfKWuvnKaWuBG4ErgNQShWAdwHPBtaB25VSt2qtj8ZVYCF/FC2TtWqDOx9eBuAbR1db1wVByAdF2+Tkeq31Ho/C/Sc2Ws/MEoZhULJNHj692fqe31nZkgVfjJRsk0eWN4fuV884dyHVqMYgRtRVwCcBtNZfUko9K3TvEHBEa30aQCn1eeAw8OFuD7Msg8XFyvAl7oFlmbE9WxiesxdnOLVR53V/8+/brp9/YF7aKwfIe5Uf4myrs+bL3PXIsR3v8bAUbZMDZ81nzkBZrBS4TR/nNn28de2ScxdiqVd5t2DvbJF/OnKSfzpycqh//8bvU7z6qovGXKrBGcSIWgDOhH53lFK21roRcW8V2NPrYY7jsry8seuCDsLiYiW2ZwvD81NXnMezz1vYdu2cs+ZYtAxprxwg71V+iLOtfu17nsQPP/3A2J531myRzbUtNsf2xPHw/v/zMh47s7Xt2hP3zsRSr/JuwR/80KU82lHfg2IY8PRzFmKvw6Wl+a73BjGiVoDwE0zfgIq6Nw+M7usVJoqZgsUV5y9uuyaDhyDkiz0zhR3v8SRyzkKZcxbKaRdjaji4UOZgjut7ED/q7cDLAHxN1N2he/cAFyul9imlisALgS+OvZSCIAiCIAgZw3Dd3qr40O68ZwAGcD1wOTCntX5faHeeibc77z19PvM48O1RCy4IgiAIgpAAFwBLUTf6GlGCIAiCIAjCTrK1LUIQBEEQBCEniBElCIIgCIIwBGJECYIgCIIgDIEYUYIgCIIgCEMgRpQgCIIgCMIQiBElCIIgCIIwBINkLM88oVxWlwFV4FVa6yPplkoIo5S6i/YRQd8C3gZ8AHCBrwE/p7UezxHxwlAopZ4L/K7W+mql1FOIaB+l1KuB1wIN4K1a64+lVuAppqOtLgduBe7zb/+R1vpD0lbpopQqADcBFwIl4K3AN5D3aqKYFE/Uy4Gy1vp5wK8DN6ZcHiGEUqoMoLW+2v/veuCdwJu11ofxkrhel2YZpx2l1BuBPwGC8xd2tI9S6iDwC8ALgO8DfkcpVUqjvNNMRFtdDrwz9H59SNoqE/w4cNJ/h64B/hB5ryaOifBEAVcBnwTQWn9JKfWslMsjbOcyoKKUug2vz90AXAF81r//CeClwC3pFE8A7gd+CLjZ/z2qfRzgdq11FagqpY7gnWTwlYTLOu1EtZVSSl2H5436JeA5SFulzYeBvw393kDeq4ljUjxRC7RDRQCOUmpSDMRJYAN4B94q63XAXwGG1jpIl78K7EmpbAKgtf4IUA9dimqfzvdM2i0FItrqy8Cvaq1fCDwA/BbSVqmjtV7TWq8qpebxjKk3I+/VxDEpRtQKMB/63dRaN9IqjLCDbwJ/qbV2tdbfBE4CB0L354HlVEomdCOsTwvap/M9k3bLBrdore8MfgaeibRVJlBKnQ98BrhZa/1B5L2aOCbFiLodeBmAUupK4O50iyN08Ep8nZpS6ly8lddtSqmr/fvXAJ9Lp2hCF+6KaJ8vA4eVUmWl1B7gEJ44VkiXTymlnuP//GLgTqStUkcpdQC4Dfg1rfVN/mV5ryaMSQl53QK8RCn1BTyx3vUpl0fYzp8CH1BKfR5vV8orgRPA+5VSReAetmsHhPT5ZTraR2vtKKXejTfwm8CbtNZbaRZSAOBngT9UStWAo8BrtNYr0lapcwOwF3iLUuot/rVfBN4t79XkYLiu2/+vBEEQBEEQhG1MSjhPEARBEAQhURIP5zWbTddx4vF+WZZBXM8Wxou0VX6QtsoP0lb5QtorHxQK1glgKepe4kaU47gsL2/E8uzFxUpszxbGi7RVfpC2yg/SVvlC2isfLC3Nf7vbPQnnCYIgCIIgDMGk7M4ThF3ziXuO8d7PPcix1SoH5ku8/vCFXHPoQP9/KAiCIPRkWsbXgYyo8GGXHdevBX4TL539TVrr94+9hIIQA5+45xhvv+0+thpe7rujq1Xefpt3fuskvuiCIAhJMU3ja18jyj/s8ieA9Y7rBeBdwLP9e7crpW7VWh+No6Dj5MR6jTsekoSwaVKpFNnYqKX2+Tf+4/2tFzxgq9Hkxn+8HwMjpVJlk7TbatI5f7HMJecsANB0XT53/yk2685QzxqlrWaLFlc9aR+GkVz/bzhNPvfAKaod7+I4eca5C5y7xzurea3a4AvfOkUzpOWeKZi84En7sc3o7318rcqdD5+JvDcqk/puDTK+WqbB8y/ay2wx3wGxQUrfedhlwCHgiNb6NICfSPEw3qGLXbEsg8XFyhBF7Y9lmQM9+x2ffYAP3fFILGUQ8s2ZrQZv+ft70y6GMEVUihb/+ub/gGEY3PHt0/zK3309tbLc8rrncel5yR3b9o/6cd74P78R62e86KlL/MlPXAHAf//s/bzzf92342/+/KefzfOfvD/y37/900e45V8fi7WM00Ln+HrDNd/F9c+/ML0CjYG+RpTW+iNKqQsjbg11aGIWduedXKly3p4y/+2HLo2lHEJ/FhZmWFnZTO3zX/c3/8aJ9fqO62fNFvjjH7sshRJll7TbapL56L9/hw/e+SiPn1ynZJs8/PgqAO+47hIu3Dez6+cN21b68TXe9PF7efj4Kk+YLez63w/LsVNegOMPfvhSzlkoj/35b/uH+zixstWaF75zaoNKweIvfvyZADx4apNf+buvc+zUOsv7o+v71GqVC/fN8I7rLhl7+Sb13eo3vrou/OgH7uDEmc1c7E5cWprvem8UP1puD02sOU1mixYX7IvHIyb0Z3GxwrKdXtjsF170pG0xe4CybfILL3qS9IsO0m6rSea8Pd7EvV5rULKLrNe8c9OfslRp3dsNw7ZV3Y9vrVeHCyMOS81//y7cV+FgDEbU/kqRIyfWWr+vVRvMldpjfxDVq/UIJ1adJnMlO5ZxYVLfrUHGV9s0etZ7XhjFiLoHuFgptQ9YA14IvGMspYqZWqNJyZbsDtNMIG6cht0jQnaZK1kArFUd9lW8/wOJ60TmikE5Gol+brXhmTFxjcezJatVpwDrNYfZUrtug8+tOt0n81qjSdGS+WI3DDK+lmyTWo96zwu7flOVUq8A5rTW71NKvQH4FF6+qZu01o+Ou4BxUHWaFMWImnquOXRAjCYhVQJjKfBABf8PjJrky5GwJ8qfROMaj+eKdqtOwavfuZCBGhhHvTwiNd8TJeyOfuNr0TJj3VCQFAP1DK31g8CV/s8fDF2/Fbg1lpLFSK3RZKEsL4UgCOnS9kQ1/P87lGwTO2HPRyUlT1RgvJRi+r6zJYvNepNG08U2DdaqDvPlnZ6oXh6RaqPJ/oosusdN0TYnIpw3lT2j5oh7VhCE9Jn1jZdAi7Rea7SuJYllGlQKVuKeqKrTxPA/Pw6CutwIefrCXr6CPw/08ojUGs3W3wnjo2gZExHOm8qeUWtIOE8QhPQJwkSB8bJedVILHc2VrG2hrySo+2NxXLmpOut3rbpdE1W0vM+t95jM606T0gSKv9OmaE9GOG8qLYmaaKIEQcgAsx1htLWUPFFeWextIuwkqDnxbvLpFMx3evoMw6BoGS2BexRVx5X5IgaKlknd6V7veWEqe0a10YwtBi8IgjAonYLu9Q5PSaJlScETVY1551urfqsOTtNls97cJiwHX5sju/MSp2SbPXdF5oWp7BniiRIEIQsUbZOiZYR25zmJ78wL8HayJe+JinMsDoT76zWnVcezpe31W7R6C5zj9pZNK/3qPS9MZc+QlYUgCFngiXRJAAAgAElEQVQhHEZbqzZS9USlsTsvzqhA4IlaqzZaBmKnJ6qXR8R13di9ZdOK7M7LKa7rUnPclqBQEAQhTcKC7mnzRFUbTQoxjsVtT1SjZSDOdXiiCj08IoFmRyIX40fCeTlFXgpBELLErG+8uK7rCZ+nyBNVd9xYQ2Wzod15612ywZdss+vuvFYyUPFEjZ1exmuemLqeEbwUEuMWBCELBMbLZr1J000+W3mrHEUvMaXTTG7HVNynR5RtE8vYHs6L0kR122ofd0b1aaZkTcaxL1PXM4KXRVYWgiBkgSCM1hI+pxXO8702GwmG9OLWpxqGwWzJq99WOG8Xu/Pizqg+zYgmKqfIykIQhCwReKICcXlayTZbOasSTHOQxM632aLle6Kid+eVeoSVWotumS/GTlE8UfmkKisLQRAyxOwOT1RaRlQ7p1JSJLHzLajftS6aqF6Zs2XRHR8l2xBPVB6pycpCEIQMMVeyWK+2d4+lF85r72RLiiSO4JorWaz5RqppwExh++f18ohIOC8+iraJ40IjQQ1eHExdz5DdFoIgZInZoo3jwsn1OpBmOC/IqZSgJiqBw+BnizbrvrB8tmjvOKev2MMjEmzBjzMNw7QStHvevVFTZ0m03bPyUgiCkD6BB+jYahXYqdlJrhxBOoBkNVFJeKICYXlnjigIPFHR3pC6f6ae7OYeP2JE5ZSa7M4TBCFDBB6gx9d8IyrFFAdAormikjg9wssI3/ZEdVLqsTuvKpqo2AjqNO8JN6euZ1RlZSEIQoYIjJeWJyotYXnonLkkCE6PKMUcFZgttj1RUQZqzzxRsuiOjWAO7pboNC9MXc+Q3RaCIGSJIIx2bLXKTMHEMtORGlQKFgawlpARFYTQ4jZQ5ko21UaT5c1GpN4syFfkujtDepKcOT6Cdu9mwOaFqesZsrIQBCFLBN6Rx1erqXmhIEhM6e0UTIKkdkoH9Xt8rRrpiSrZJi7Ru8QkOXN8BO2e91xRU9czqrI7TxCEDBGE0c5sRQufEy1L0U7ME5XUWByu3yjRfqGHR0QW3fFREmF5PqlLnihBEDJE+BiSND1R0M5ZlQT1hKQV4frtPPIF2gZSlDZH5B/x0RKWixGVLyTGLQhClgiHmKbKE5VQIsuw9ynKExUI2yM9UWJExUbRz70l4bycEbwoBXHPCoKQAWzLpOxP0tPkiUpKExUWk0d6olranAhheaOJZYCdkth/kmnVu3ii8kXNaWKZhrwUgiBkhll/ok8rR1SrHP45c0mQlJcnbJhGeqJ6aHOqDVe8UDHRSrbZJdFpXpi63lFtNOUcJEEQMkVgPKV15Eu4HEkl20wsnBcyTKM8fb2SPiZxLM20UhJPVD6pNZpyDpIgCJliLiOeqLlS8p6ouMfjbeG8HrvzoibzJA5InlYkY3lOqTlNEZULgpApAuNpNgOeqGqjSSOBiS0wWuIej4tWW74ReexLr3CeeKJiY1LOzuv7xiqlTOC9wGVAFXiV1vpI6P4bgJ8BjvuXXqu11jGUdSzUHIlxC4KQLVrhvLQ1Ub4Rt1ZzWJyJd5xsZSyPeTw2DIPZouXliYo69qVH0sd6AgckTyulCUm2Ociy5+VAWWv9PKXUlcCNwHWh+5cDP6m1vjOOAo6bJA68FARB2A2tcF7KnqjAiFuvNVicKcT6WUkmspwr2X4y016786KE5aKhjQvbNDDIf56oQd7Yq4BPAmitv6SUelbH/SuA31BKHQQ+rrX+nV4PsyyDxcXKUIXth2WZfZ/dNAwqJTu2MgiDMUhbCdlA2ip+9s2XATiwrzJSXY/aVmfv9f6tUSzE3uaWH1pb2jfL4kI51s9amCnw6Jktzjt7nnJhuzdqyfeI2RHfuWkYVMrxzRfT/m4VbRPDtnJdB4MYUQvAmdDvjlLK1loHWzj+GngPsALcopT6Aa31x7o9zHFclpc3hi5wLxYXK32fvb5Vx4LYyiAMxiBtJWQDaav4KeBv8647I9X1qG1lNDxR+dGTa5w7E69XbHl1C4Ct9SrLzXi9ETO2iW0abK5tsWVsF7JvbdQAOL2yuaPuNrbq2JaZ6pw1yRQtk9X1aubrYGlpvuu9QfyUK0D4CWZgQCmlDOD3tdYntNY14OPAM0coa+zIbgtBELJGS1ieFU1UNf4dekkJy8Gr19mihWHs3AnYEpZLOC9xiraZe03UIL3jduBlAL4m6u7QvQXga0qpOd+g+l4g09ooyfshCELWODBfwjIN9lXi1SH1IzDiksgVVXWSOz3i7PkSZ8+XIu8V+hz7Iilx4qNkGZO/Ow+4BXiJUuoLgAFcr5R6BTCntX6fUuoG4DN4O/c+rbX++/iKOzo12W0hCELGePFTlzh0YJ69lWKq5VgoB56o+I2oun96hJXA6RE/f/giturR3rVS6wDinZmz644rKXFipGibVBv5zlje14jSWjeB13Vcvjd0/2bg5jGXKza83XmyshAEITtYpsH5e2fSLgYLfjjvzFYCnqgEQ2VzJbtrNnjLNDCN6KSPVdnNHStFazrCeRNFtSHJNgVBEKKwLZNKwWI1ASMqK/pUwzC8yVwylidOyY6u9zwxdb2j5riyshAEQejCfNlmJYFwnqdPzUZUoNtkLidcxEvBMuXYl7whKwtBEITuLJRtVjbrsX9OlqICRTt6MpdwXrwUbZO6GFH5Qs5CEgRB6M5C2WY1EU+Um8jOvEEoRITznKZLoymRizgpWWbuM5ZPVe9wmi5OU87OEwRB6MZ8yWYlIU1UVjxRpQiBc+AhkfkiPoqiicoXwUshydMEQRCiSc4TlZ2oQNRkXhMjKnamJdnmxBC4DeWlEARBiGa+VEjME5WVsThqq30ro3pGxO+TiITzcoasLARBEHqzZ8am2mjGPrllaedbyd6ZObsq80XsiCcqZ1QbEs4TBEHoxbyflHJ1K94delna+ebtztueObvmZ9LOShknkeIEHPsyVb1DPFGCIAi9CY5+iTtXVJaO4IpKtpnkAcnTihdGdXHd/B79MlW9I3gpspLgTRAEIWu0jKjNmI2oDB3BFaWJSvKA5GklMKKjzi3MC1PVO2p+Q2Vl9SMIgpA15ssFIAlPVHZyMEXtzmvt5pb5IjaCus2zLmqqekfbEzVVX1sQBGFgFlqaqAQ8URkxUEoRAueqzBexE9RtnnfoTVXvqMrKQhAEoSfzCWmiqhnanVeM2Gpfk5Q4sVMUT1S+EE+UIAhCb4LdeXGen9cITo/IyFgctdW+JsmZY6cknqh8ISsLQRCE3limwVzJijVredb0RiXLpO64NEO7xCQ5c/wUAk+UGFH5oJU8TVYWgiAIXVmI+fy8wEDJys63gr9LMDyZt1LiZGQH4SQSeKLqEs7LB3KgpCAIQn8WyoVEPFFZGYujttrLbu74KdqegVoVIyofSMZyQRCE/syXbc7EmCcqa2NxEFYMT+aioY2foG4lnJcTRBMlCILQn4WyzWo1PmF51k6PiJrMZb6In5bx2pBkm7lAYtyCIAj9mY9ZE5U1L08pQuBcdZoULAPTkPkiLiTFQc6oNlyKloEhL4UgCEJXAk1UXGeatcJ5djbG4lbSx45wXlaMvElFwnk5o+Y0M7MbRBAEIasslG3qjstWTJNbLWPn0kVtta85YkTFTZTxmjemqofUGtnJkCsIgpBVWlnLYwrp1XwNTFbG40DgHg4rVTN0LM2k0toVKZ6ofCArC0EQhP7sKcd7fl4tYzn7orQ59QwdSzOpyAHEOSNLB14KgiBkldbRLzHt0MvazrdShDanKpqo2ClMwLEvdtoFiJtP3HOM937uQY6tVinaJoszhbSLJAiCkGkWgnBeTLmisnYYfNHeOZnXHFl0x41tGlimkWtPVF8jSillAu8FLgOqwKu01kdC968FfhNoADdprd8fU1l3zSfuOcbbb7uvJY6sNpo8vlrlE/cc45pDB1IunSAIQjZpaaJiylqetRQHQebsWsfuvJKkw4mdkmVOvCfq5UBZa/08pdSVwI3AdQBKqQLwLuDZwDpwu1LqVq310bgK3I9G0+XD//oYp9ZrfPhfH9uxu8QF3vu5B8WIEgRB6MKesuex/9Q9j/Pw6c2xP/8bR1eB7HiignDebfce59unvO/70OlNLtpfSbNYU0HRNvnqw2d4z+e+tet/axhw7SUHOX/vTAwlG4xBjKirgE8CaK2/pJR6VujeIeCI1vo0gFLq88Bh4MPdHmZZBouL8XRMyzIpVUp88M5HOble3XYOUphjq9XYyiAMhmWZ0gY5QdoqP4yrrfa4LurAHHc9eoa7Hj0zhpLt5ElnzXLwrDnsDHijZubKnL93hjseXuaOh5db11/+xL2x9n15t+AZT9jDFx84yf0n13f9bw3D4JIn7OXpF+2PoWSDMYgRtQCE3yJHKWVrrRsR91aBPb0e5jguy8sbuy7oICwuVnC2atz66ucAcO37/oWjq9Udf3dgvhRbGYTBWFysSBvkBGmr/DDOtvrLH798LM/pxdrqVuyfMSgffeWzI6/H2ffl3YIbf/BpIz8j7jpcWprvem+QJcAKEH6C6RtQUffmgWUywusPX0i5w11ctk1ef/jCdAokCIIgCMLEMIgn6nbgWuBvfE3U3aF79wAXK6X2AWvAC4F3jL2UQxLonoLdeQfmS7z+8IWihxIEQRAEYWQGMaJuAV6ilPoCYADXK6VeAcxprd+nlHoD8Ck8r9ZNWutH4yvu7rnm0AExmgRBEARBGDt9jSitdRN4Xcfle0P3bwVuHXO5BEEQBEEQMk362yIEQRAEQRByiOG60WkAYuQ48O2kP1QQBEEQBGEILgCWom6kYUQJgiAIgiDkHgnnCYIgCIIgDIEYUYIgCIIgCEMgRpQgCIIgCMIQiBElCIIgCIIwBGJECYIgCIIgDIEYUYIgCIIgCEMwyLEvmUcpZQLvBS4DqsCrtNZH0i2VEEYpdRdwxv/1W8DbgA8ALvA14Of87PhCSiilngv8rtb6aqXUU4hoH6XUq4HXAg3grVrrj6VW4Cmmo60uxzs14j7/9h9prT8kbZUuSqkCcBNwIVAC3gp8A3mvJopJ8US9HChrrZ8H/DpwY8rlEUIopcoAWuur/f+uB94JvFlrfRjvTMbr0izjtKOUeiPwJ0DZv7SjfZRSB4FfAF4AfB/wO0qpUhrlnWYi2upy4J2h9+tD0laZ4MeBk/47dA3wh8h7NXFMhCcKuAr4JIDW+ktKqWelXB5hO5cBFaXUbXh97gbgCuCz/v1PAC/FO+xaSIf7gR8CbvZ/j2ofB7hda10FqkqpI8AzgK8kXNZpJ6qtlFLqOjxv1C8Bz0HaKm0+DPxt6PcG8l5NHJPiiVqgHSoCcJRSk2IgTgIbwDvwVlmvA/4KMLTWQbr8VWBPSmUTAK31R4B66FJU+3S+Z9JuKRDRVl8GflVr/ULgAeC3kLZKHa31mtZ6VSk1j2dMvRl5ryaOSTGiVoD50O+m1rqRVmGEHXwT+Euttau1/iZwEjgQuj8PLKdSMqEbYX1a0D6d75m0Wza4RWt9Z/Az8EykrTKBUup84DPAzVrrDyLv1cQxKUbU7cDLAJRSVwJ3p1scoYNX4uvUlFLn4q28blNKXe3fvwb4XDpFE7pwV0T7fBk4rJQqK6X2AIfwxLFCunxKKfUc/+cXA3cibZU6SqkDwG3Ar2mtb/Ivy3s1YUxKyOsW4CVKqS/gifWuT7k8wnb+FPiAUurzeLtSXgmcAN6vlCoC97BdOyCkzy/T0T5aa0cp9W68gd8E3qS13kqzkAIAPwv8oVKqBhwFXqO1XpG2Sp0bgL3AW5RSb/Gv/SLwbnmvJgfDdd3+fyUIgiAIgiBsI3FPVLPZdB0nPsPNsgzifH7ekfrpjtRNd6RueiP10x2pm95I/XQnK3VTKFgngKWoe4kbUY7jsry8EdvzFxcrsT4/70j9dEfqpjtSN72R+umO1E1vpH66k5W6WVqa/3a3e5MiLBcEQRAEQUgUMaIEQRAEQRCGYKBwXvicpo7r1wK/iZeJ9Sat9fvHXkJBEARByDmfuOcY7/3cgxxbrXJgvsTrD1/INYcO9P+HQqbp64mKOKcpuF4A3oWXtv5FwGv8M4AEQRAEQfD5xD3HePtt93F0tYoLHF2t8vbb7uMT9xxLu2jCiAziieo8pyngEHBEa30awM8BdBjvvCABeN8XHuSuR1dS+/zzFsq86aUXYxhG69qH73yEj371kdbv/+ny8zj8pH38f//rCA8tb6ZRzF1Tskx+9cVPpmiZvP0f7mOr0ez/jwagYJvUx/Ssflx6cJ6fO3xR1/u/9+kjPHBqg0sOzvPzhy/i418/xse+0XvALVoGv/q9T2GmYPG2277J5gDfZa5o8Zvfpzi2WuW//fMDNJrRO2HiqpsfuewcXvxUb9PLHQ8tc9O/PET6e3F2z7D189SlWf7r1U/mtnsf55a7jw79+SbwyiufyBXnL7aurVUbvPMz9/NLVz+JU+t13vlP91Pv0r6jcvFZs7zhe5488nPe/dkHuOfxtTGUKB4M4Kefcz5XnL/Ib39Kc2ytNtC/+/dHz1Dr2GW21Wjy1k99k0/ce3zs79ZF+yq88cVP4TP3neBv/vWxgf9d2TZ500suZqvR5Pf+8ciOMsfBs87fw89ceQEAj57Z5Pc+fT9Vx6uPbu/VNYfO5gcvPciff/lhLj1nflu/T5q+RpTW+iNKqQsjbg113o9lGSwuVgYu4G6xLDPW5++Gv/2371CwTJ64L/nyHFvZ4o6HlnnLD17CnplC6/rf/dvXuPfYGt91cJ67Hz3DuYszfO+l5/DRf/8OT9g7w8GFco+npk+14XDHQ8tcd+o8ZooWn3/gFJecu8BMwRr52XXHhZDBGRcPn9rg3mNrvOnaSyLvO023NfB94+gqb772Ej71zeOtdoui5jS546Fl7jm5yd5Kgc89cIqnnbNApdi9XtaqDe54aJmfvqrG176zypcePM0VT1zcZnQHxFE3X3vsDPvnSvzwc7wB9Iu3f5uvPnKGZ6Y4IA7LMPXz2PImX314mf/7ukv51DdPcM+xVQ4dXBjq8//10TN85oFTvPjp57au3X3kBLd+/Rj/8Rnn8tCpDb7Yo31H4dHlTe58eJn/5+WXRj57N2Pyh//tMfbMFDh/bzbG8E7uengZdc4Cz7hoPx//xuNcsK/C0nyp77/rZozUHHfs79Z3zmxyx0Nev/rH+0/y9e+scsm5/fvVRs0bWx9YqXFms84XvnWaZ5y3h6Idn3T6oVMbfOvkBr/8Hw8B8OkHTnH7t061Pjeqbu49uoptmfzkVU/iL77yMNd997nb+n3SjJLiYKjzfqYpxcFatcF/uvwJ/JcXdvc4xMX/+Pfv8LZ/uI+jJ9ZwQy95rdHkaQfmeM+PPJ1X/MWdrGzUOHrCW/n958vP40e+O73OOAgn1mtc88df4uSZzZbh9NZrFE9YnBn52Un1nd//pwf42397rOtnVf2Vl20abNYcTp9eZ3WzzqUH5/mDH3l65L9Z3Wrwve/5AseXN6hXPaP5t69RPHFv93p58NQGP/pnd/DQ46s8enKdSsHij3/0GZF/G0fdvOIv7mSz2mg9d32zxkLJ5o+6fMcsM0z93PyVh3n3P3+Lx46vcnqtxtMOzPPeIb/7T/7lV3noxPq2Mpw47f18/9EVHl3eZK7UvX1H4QP/8hDv+fyDPH5ynVLEhLubuqk3mrzs0Nm8/qrkx8xB+P7//0usrFc5fnIdgFc+93xe9rT+uqZr3/cvHF2t7rh+cL7Ef3/Vc8f6bn3wzkd41z89wNETq6xs1Lhof2Wgd+qR5U3+jz/9CsdPb7BS9Y6efed1T9u2CB83f3z7g/zZvzzEyVPrWKbBCT8acuMPPo3FSiGy7/yXv72b9a06y8sbbNUdDKcZ+7i9tBS9eIXRdufdA1yslNrnp7B/IfDFEZ43UdQaTeqOy1xpdA/JMAQGxmbd2Xa90WxiW55lX7YtNusOW/7fjMObEzczBa/Lbtad1nfLQ7nD2JbRNWwGXhsBLJRtXDyjarPuUC50f10Dj9N6zWG95tXLbA8vFMD+ShGAU+t1Tm3U2FuJb7CMwjaN1ncFaDhuq29OA7Mlbw27VnVYqzWYKw2/pj24UN4xSQfvx9GVLY6uVDk4H4+Xuey/f1sdY81ucV0Xx/X6RVYp2SbVRrO10IkyGqN4/eELKXf8bdk2ef3hC8ddxNDY32Sz3myNmf2YbY0hDdZ9I6rfGDIq+ypFmi4sb9b9z/b6UC8PetE22Wo0abouNcelaKWbZGDXn66UeoVS6jVa6zrwBuBTeMbTTVrrR8ddwLyyEXSGlCb4chcjqu642KbX7DMFk41as1XWQV+2NCnb7e+VWyPKNHCaLt2OXGr4rv9gUt2oO2zWnJ7f0zINyrbJRs1ptWe/AXCuZFGwDE5t1Dm1UWefb1QlhW2are8KvoGf4Ql03Mz57bNW9SatuREmrIPzJY6ubG3rU8H7cWy1ytHVKgcX+oedhiEwDkbVJgYLi2B8yiIl2/KNKMf/fbCyXnPoADe89GIOzpcw8NrrhpdeHMvuvJYRVes/boSpFL3xZt0fQ0q2iR2zgbLPX7id3mgbUQXL6BlCLNkmtUaT2i4N2bgYaOmjtX4QuNL/+YOh67cCt8ZSspyzVvMt+dQ8UW2PTZiG47YmqpmCxenNetsYiXnVMQ4s06Bkm2zW2wN2Lw9NFgnq32lGe16CyWTeN6I8g7HZc3UG3uptvdagYBlYRv/BxTAM9s4UOLVR49RGjfPHEBLdDZ0euUbTnS4jquWJarBWdUb0RJXYrDdZ2Wq0wi/BO/KdlSpHV7a49JzuIYlRGJcnqm1EZbcPlAsmW/Vmy2AMFnWDcM2hA4mkNGgZUQ1voTloGYuWgWUaLW92Eg6AfbNeXz25UeMpzLJea/T93GKnNzDlRXS+Zp8c0fJEFRM/Wcf/3LZLN0x4tT9TDMJ53t/M7GJASJOZgtUyLMq2iZmAGHycBPXfLaS304hqDjQYzpVs1qveKrJStAcSEO+fLXJqo8bpjXpK4bxOI2p6hqSgfc9sNdioO63fhyHYEBIO6QWLo2+f2uDMVoNzYto0MjZPlO+VzHJI1wvnOa3vmrYXJIpOyUOlOFgZDcNgrmixUXNYrzUScQDsm/ElBRveLseNmtMKc3ejbJvUnCY1fwdfKeX+kr0eMCG0dClph/NqEZ4oK/BE+eGfnIXFZgpma4DIS5nDBC7y7kaUNzjMl333erXBVqO/tqFSsNioO6zVnIG1DHsrBY6v1VjeTCOc12FETZkmKvA8HfMNn1EmrYP+5pGjKyEjyn/3g7Ho4AC7yIYh8ASP7olqb6jIKiVfj7NbTVSStMN5gSZq8H4VeLOT9kSFw3n9xq6dujTxRE0kLV1KxsJ59bAnqmCx5Xs5IB/hPAg8UU3fiMpfF255orpsew6uB56J5U0vNNwvnDdbslivNnxP1GBtua9S5KHTmzRd0jGinJCwfOrCeV4bBYbPqOE871lbrWud7358mig/nDc2TVR2+8CwmqgkmWlFIfxw3i6Modmi3dJV9vMIjYP5ko1tGpxc94yojVqjrxFVtDxDNivewOz1gAlhvTbYxBcXlUKXcN42YbnVIdDOR3cIh/PyYviFaYfzoiedYDIJJtXA1d1vMKwULF8U2mB2wDDyvkqxtaLbl3Q4zzI7wnlTJixveaK2tv0+DHtnChQtY3s4r8OoORCTJ2pm7Jqo7I5DZd8TFUggdmOgJEUwjq9UGzSa7q48Sp4nyjeiEhhbDcNgX6XQGuPWB1gAlmwTp+m2PK1x5rEahOz21pzT3maejiaqe4qD7cJyl/b20ryExmYK5q53nmSJQTVRC+XtRlQ/I3e2ZLdEoYMOgGHDKXCtJ0W0Jmp6jKiybWIZIU/UCJOWYRhemoOOcF6wSrcMWJqLx4gqtcJ506KJykc473Rr8TV4GWd9I2p9AI/QuNhXKXaE83rPmUGdr/ppGNJug+z1gAlh0G3mcVGyTQyiduc1t2mioB2PzuKqKoqwBy0vZQ4T1H9/YXkwGHrt029FOdsShToDh5HDhlMg8kyKKE2UlXLOlyQxDIO5kt3yHo3iiQJP8xR4tcB795+4dwYDOHu+hBWTgdoWlk/+7rxAWF5t7c7LXn+tFLaPG7tZaHpjSGMgj9C42Ddb2CYsH8QTBbCy5RlRabdB9nrAhBCE89LylBiG0TI2wtQ7PFEAJ9frFC0j04NXmJmCxZafgDKtPFyjEIQrumuivAE6mFQDvUA/g3E2EIVW+28TDgjroFLxRE2xJgo87+HxtTEZUQslvhPyRG3VHRbKNvtni7GJyiGc4mBUTVQehOXbNVFph5KiCBbQwbixmzFythj2ZicTRdlbKXJyPQjn9feAdRpRabdBOrGmKWC95ome41r9DULZ38UWpuE0dxhRpzZquQqLzRQ8j4vrurnRcYUZOMXBjnBe/zxRNcdlZasxsCg0COfZpjHSFvthmPY8UeCF8IIqGPV0g/2zRU5v1HBdF8Mw2Kg3WZor8v2XHODsufi8jONOtmllWRNV8M5zW/dDpVlMr2IYBuWCObCWMkylaLGy1aDa6J+XblzsrxQ4vVn3dE71Zn9heSuc5xmJaYfzxIiKiSQt+W4Eu9gCmq5L02WbsBw8t2+ujKhi28OWy3DegMLysm1hm8Yuwnlef9vaxQAYeKL2VQpjP5i2H7bpCUQDpk1YDtu9T3MjjhczBQvH9Q61LdlGKwXIzx+O9xy6YBIbWVieA01UYDCubjVSn7x7MVOwQuG83WmiglBlkpqouuO2PLL9cisGKQ0CT1Ta7ZDdXpBzdrPNPC4qRWtbnqjOQWrGT8KWP0+UyVaQSC5H5Q4YVBNlmwaVohVaUfbJExXqb4PmJ1ucKWCQfHoD6JInKsNeiDgIjKhin6MuBqG1I9d/57cSSgFiGEZr19oo5EUTBd4Envbk3YuZQnvc2M08tG0MSTUTEqcAACAASURBVGj+CpL8PnR6c6DP7QznlfJ2dp4wGEnubuhGcMBwQOcgFRhOZ7YauTo6JVhxByHTvDFonijb8ianM1uD6evC/W1QYbllGizOFBLPVg5ddudl2AsRB8HmgVH1UNCeAIPkuRsJJqMtF6ypSbYJcGarnnkjKhg3dpcnqv23SZ22ERyE/sjygEaU1fYGQvrJNiWcFxNJ5dnohZfZOyzc9X4OdFrhY17S9prthvDEkM9wXu+M5U5oMgl/177JNovDtefVF+/nov2zA//9uLAtEZYHxlMcRtRus1WPwrR4ooLEomc2G7s6Ny9pwu2+u3CeHfo5me93lq/Xe/BUYEQNluJgZcsLV4qwfEJZrzmxnVU1KF4oqN76vTOZXThRZd7CeQG5DOcNqImyTXObMdQ32ea2AXDwV/uGlzx14L8dJ7Zp4Li0hNDTaETNjtGImgmF8+pOE6fpJuiJMseYJyq7Hp6wJyrt8b0Xw46RaYTzgnq87/jajjJE0TKiqg0MvFB4mmS3t+ac3SQ8jItyR4qDHZqo0IuW5VVVJ9tXWfkpd0A/TVTdaa/IA8OpMEAKitkUBsBR6PTIhXeOTgtBgs1REm0GBJPlRt1p5alLKqN/2bamI09UIT+aqKif+xGWASS1MapStNg7U+C+4+v+5w64O2+rQdE2E98Q00l2e0HOWa82Ug+RzXSkOAgGqUKHJgoY+KTvLLA9nJefcgcU+uWJ8j1UBctoGbqDrCbzZkQVOozJRtOlkGEvRByMM5wXGEwbtdBRTglN9GV/s8cotManDOviAsOp0XSzbUT5fcFgd7vXZgvtfpjk/HXunnJLw9Xvc8sZE/enX4IJZaOelRQHEcJyf5AqWOYOkXke2I1OKItYu9md53/XQbRf4f6WlCh0FKwOgf00hvPaRtT4PFGbdacVWktUWD6yJirQAmZ3WgqLmLMwgXcjWHzNFKxdeWq2e6KSG1vP29MOjfZL9VEM5SXLQhukX4IJpNZoUnfc1L0BMwWLrXoT1w0mqZ2DVDDI5sqIyqmWK6Bvsk2nrYkKjKdBPFEzOfNEtcN5Tf//07c7b26Mu/PCnqhAXJ5cOG+MmqgMG9LhI0ayvKllprX42t0Un4YmCjxPVFQZoggbTmJETShpn5sXEBwwHCRPixqkwiuWvLBNy5WjcgcMLCy3jF0NhrZptAb5tPveIIS1Ya7r4kyjJ8pfdY+aaBO2e6Ja4byEwt2eJ2oKNFEZm8C7MeziOPBml2wzUYF/YEQVBsiXFvYGFjMQ/k+/BBPIen2w2G7czISEpuCdmwfbB6nykCuWNBl2+25WCOq/3lUT1W6n3Rq5laKFZWR7gA8I10PnztFpIfBADZrXqxfBO7xRSyGcNwZPVD1HGcs7f84awxpRRcvACskIkiIwogb5XNs0CLpIFsa59EswgaxXfU9UwmeRdRJMwMGqNMjJEx6kKrsIF2WFbZqoHJU7IFjhdddE7cwTNahBPlu0qBTt1HesDEI4rJkHL0QcHFwo8eSzKlxycH7kZ5mGZ3Rv1EPhvEQ1UdOQbDMvmqjhjCjDMPwxJNlxNdBEDepBD9ohC4Zs9tWnOaQVzkt5gm/ljam3NScwCeG8vCfbHEwTZYWMqEFTUMwW7a4erqwRDmvm4dy0OJgpWPz1Tz1rrM/bHs7LjyfKyYE3spyTtDDtcX33dTlbtBKXAxycL2EagzseSra3WEg70SaIJyoW1n0jKivhvOAsraiQSTucl90BoZOiZRDYgXky/gLax75010RZpoFhGLseDCsprCKHpeWRc9xceCHyQKVo+eG8pDVRpudR7NKnByEP3kjbbI89k+iJAq8PJW1E2ZbJgfnSwJGFwHhK+8gXEE9ULGwmvDOmG8EBw61wXsQgFUy4ecoT5RkX3lldaWerHYa+nqiQwHqmuLtw3rOeuNg6UyrrSDhv/FQKFpv1ZjvZZmKeKO9zthpN5oYU++bBG2kYBiXbO04r00aUP14MMwc994K9qWxMufopZw08zpVaRlT6bSBGVAzU/NVY2vHaHeG8iEGqPMKKJU2C8uZB+9NJf01UyIiyd+cpfPXzLhhDCZMh2ohKf1DMM5WixUbdYbPR3HWixVEIwlxbdWfodA2NZhPT8LRdWaZkW9k3okYI5/3Xq5887uIMxBu+Z/DPDeo+C+E8MaJiIEgpkPb2y2ACDjxRTkTIJMhonKdwHuRzV15AYMN210S1jz9pu+Xz+327IZqo8TNTsDiz1WCr7uw60eIohD1Rw5KXZKvlHIyZecz/txsCIyptRwWIJioWaoERlbYnqrjdiIpa7bfCeTl72WYK+dH+dGIY3jl43Y99cVveqpZbPmftMwitPFGOhPPGRaVosVnzzs5LUk7Q9kSNakRlf0rKUiipG9NiRKXtqADxRMVC4IlK+yXbkeKgR56ovHk68j442KbRM9lm2xOVv92TgxI+gFiE5eNhpuCH8+pOou902xM1fJqDhpOPjPViRKVPYDxloQ3EiIqBqhMcIJu2EeUPbD00UU9dmuPCfTPMp5zTarccOji/7VzAvGFbRmtLdydhI+rgfJkD8yWectZsksVLhChNlJUDT0SWqfgpDs5sNRJ9p8fnicqDEWX5/89uX12cKfDEvTNcvDR54wZky5DN18yZE2qNJpZppD4gBNtxq43AE7Vztf+ip+znRU/Zn0r5RuGXdyFCzCK2afbMExW00XzZ5mOveW6SRUuMbUaUaKLGwoyf4uCR5U0OHRg9geegBB7tkTxRzWbqY+YgBAZjljVRJdvkI698dtrFiI1cGVFKKRN4L3AZUAVepbU+Err/BuBngOP+pddqrXUMZc0NNadJKQOx2mA7biD2FN1JduitiWpOhTHR1kQ1pW+OiUrBotF0eezMFi9VS4l9biDwnQ5PVHYm8GklS20wiCfq5UBZa/08pdSVwI3AdaH7lwM/qbW+M44C5pFqo5m6qDygZFvtA4hlG3lm6K+Jmvw2Ek3U+AnE5E0Xzt87k9jntsJ5I2uist/vyxnaGTatZCmkOogRdRXwSQCt9ZeUUp1nFFwB/IZS6iDwca317/R6mGUZLC5WhirsIFiWGevzByuESblgpV8OvEG1aXp1bvsndO/fV6EyhlPjJ40k+06xYGFYXfqIaVAqZqP/BMRRN2u+I65YLlCeKQKwd89Mpr73oGRi3AHO2tM2nA49YW9iZWr4k5pRsHd85qB1Y1gmRTtb/T6K+YrXV5f2zbK4UB75eVnpO1mkW90szHptsLiQ/ngxyEy6AJwJ/e4opWytdZAW+a+B9wArwC1KqR/QWn+s28Mcx2V5eWPoAvdjcbES6/MHYW2jRsEk9XIAFEyD1fU6y8sbrK3XAFhf3aKWgxVf0iTZd0xcNqv1yM/bqjYwmvG+J7sljrrZWKsCsLJWpeR6FtXmRi1T33tQsjDuALj1drb6RdtIrEzVmve5p1e2dnzmoHWzWW1gkK1+H4Xhe/SrG1WWu3iTd0NW+k4W6VY3rr95y6k1Eqm7paXu+sJBZtIVIPwEMzCglFIG8Pta6xNa6xrwceCZI5R1Iqg2mpk40wc8d2cvYbmQDrZp9skTNfltJHmixk+QO222aLGvUkjsc0sdiX2HIXfC8oyM8dNIOUOaqEFKcDvwMgBfE3V36N4C8DWl1JxvUH0vMPXaqJqTHU1UuUMTFRxsK6SLp4nqn+JgktmWsVyMqLEQGFFPWJxJ9D23TYOCZYwmLHfyoQVcnCkwUzApTMFCJ6u08kRlIKIySDjvFuAlSqkvAAZwvVLqFcCc1vp9SqkbgM/g7dz7tNb67+Mrbj6oNZqUMvKClQpm24hypmNyzgO21UNYnpPJZFSC7+iIsHxsBCcPnL84ulZnt3gLtlE8UfnwwP7od5/L4Sfvl8VoiuRqd57Wugm8ruPyvaH7NwM3j7lcuabacKkU029c8Nyea1ueXqHRdFNPACp4FPp4oqZhlRt8x3CeKOmfoxEk2E1yZ15AuWCOnOIgDzve5ko2T8lZcuJJI0tGVPolmEBqTjMTZ/pAoIlqh/OmYXLOA5bVSxOVD23IqLTCeaKJGhv7Z4vMFi2ece5C4p9dts0Rk23mwxMlpM8T985Qtk2W5oppF0UylsdBrdHMhIUMO4Xl0xAmygO2abDZwxNlTYExYRgGltGhiZJJdCTmSjaf/rnnp9J/ygVrRE2UjE/CYFx23h7++RdekImQqvTYGKhmSFi+LWN5Tg74nAZ6CstzknRwHNiWue3sPPFEjU5aBnjZtkb3REn7CwOSBQMKxIiKhWojS+G89u68ugxSmcE2DepOr4zl09FOXj24NJxAWJ6N90bYPePQRE1LvxcmBxmxYiB74by2J0qEu9mg5wHEUzSZBB458UTln3LI6z0MDWc6zowUJguZUWMga8LyYJKaFsFyHvBSHEy3sBy80FOj2WyJ7GUSzS+eJkrCecJ0kY2ZfoJwXTdTBxAHW4arDcff/ZKNck07tmm0QlidTEueKAjqwTPwTQPMjOgchN1Tts0RM5ZPT78XJgfpsWOm7q+osxTOA0+nJVuIs0PfjOVT0k5hYbl4IfJNudDWXw6D9AEhj2Rjpp8gar53ISvhvOB8p8CIKsgglQls08CJMKJcd7oMirAmSrwQ+WZ0TdT0LB6EyUFGrTETrMQy54mqN3GcpoTzMkLggekkyL85dUaUTKC5p+wfMdV0oz2s/ZgmLaAwOciMOmZanqisGVFBOE8GqUzQLZzX3uo/He0UaMOkb+afsNd7GKQPCHkkGzP9BNHyRGXE41MqeOXYEmF5pugmLG9n7p6OdmprosQLkXfKwVgzhLi86bo0XckTJuQP6bFjptbItidKNFHZwDYNHNfTQIVpbfWfknbaromaju88qQSeqGF0UZLiQsgr2ZjpJ4jshfNCwnLRnWSGoB06Q3qN5hSG81qaqGy8M8JwtD1RQxhRkmxVyCkyao2ZzIXztnmi5IDPrBC0w04jaromk3aeqOk4dHmSKRcCT9Tuw3nB4kH6gJA3ZEYdM1nzRJU7w3niicoEgZEUhDEC2pqo6WgnL3O7CMsngWCsGc0TlY1xUxAGRXrsmKll1hMlwvIs0TKimtsnnLYmajraKThDUITl+SfwRA2TtVw0UUJemY6ROkGqGRWWbwWaKJmoMkF3TdQUhvMCTdSUGI6TSjk01uyWaev3wuQgo9aYaRtR2RgMOjOWy0ovG7Q9USIsd4LdedI3c01LEzWMJ0qMKCGniBE1ZgJNVFbCeQXLwMD3RDWbFGS1nwlawnLRREmyzQlhNE/UdC0ehMlBZtQxk7VwnmEYlGyTal08UVmiqydK8kQJOWWUZJttTVQ2xk1BGBTpsWOmlWwzQ4NByTY9YbnoTjJDWxPVISyfsl1KLWG5I8LyvDPKsS8SzhPyynSM1AnSCudlxBMFXlk2G01ckBQHGaF7niiv/0xLOxWstieqkKGFh7B7CpaBaYymiZqWfi9MDjJqjZlqw6VoGRhGdgaDcsFivdoAkIkqI7Q8Ud00UVOyIrdCyTan5TtPKoZhULYt0UQJU4XMqGOm5jQzo4cKKNkm6zVvdSiaqGzQXxOVrT4UF144T/R6k0K5YA6XbHPK+r0wOUiPHTO1RjNTeijwjKg13xMlK71s0DXZpm9UWVNiUNhBOE80URNBuWANl2xzyjywwuSQrdl+Aqg6zUzpoaDTE5Wtsk0rwWRRn/Jwnm0aNF2oyaaHiaBsm0OF8+qSsVzIKTJqjZmseqICI6owJZNz1gmM2c5wXt2ZLm1I8D236s7UfOdJplywhhKWO6KJEnKK3e8PlFIm8F7gMqAKvEprfSR0/1rgN4EGcJPW+v0xlTUX1BpZ1ERZ7XBexgy8aaV7xvLp80SBl6BRvBD5Z1hP1LSl9hAmh75GFPByoKy1fp5S6krgRuA6AKVUAXgX8GxgHbhdKXWr1vpoXAUehAdPbnB6s57KZ59Yr2UunFe2zVbulmmZnLNO0A73H19n70yhdf3BUxve/SkxdsPfU/pm/ikXTB47s8Vdj5xpXZtb3mJtrdrz391/0u/30geEnDGIEXUV8EkArfWXlFLPCt07BBzRWp8GUEp9HjgMfHjcBR2U5Y0aP/aBO3D7/2lsvOCifSl++k4WQ5P0QuhnIT3mS96r974vfpv3ffHb2+5ZpsFMYTqMqIVSewiaKw0yHAlZZu9MgS986zSv+dC/7frfGsBsyRp/oQQhRgYZtRaAM6HfHaWUrbVuRNxbBfb0ephlGSwuVnZd0EGxLJNbf+4FnFyvxfYZ/VAH5licK6X2+Z382ssOcc1l51KyTa64YB+um6aJmV0sy4y1b4ZZXKzwd69/PssbOz2mS3NFzjt7PpFyDEpcdfNjV17AU8/bg9N0+e7zF1uH2OaNJPtOlvl/X/50fuw5K9uumaZBs9l/zNk3W+RJB7PV75NA+k538lA3gxhRK0C4Z5u+ARV1bx5Y7vUwx3FZXt7YVSF3w+JihQNliwPlmdg+oy8NJ9bvOAyH9nn14brx1n+eWVysJFo3587YnDsT/QpmrY3irJuLF8sAbK1X2YrlE+In6b6TZZ62f/vYu5u6mcY6lL7TnazUzdJSd+N+kJjB7cDLAHxN1N2he/cAFyul9imlisALgS8OX1RBEARBEIR8YPQL7YR25z0DL2x9PXA5MKe1fl9od56JtzvvPX0+8zjw7T5/IwiCIAiCkAUuAJaibvQ1ogRBEARBEISdTMcWIEEQBEEQhDEjRpQgCIIgCMIQiBElCIIgCIIwBGJECYIgCIIgDIEYUYIgCIIgCEMgRpQgCIIgCMIQTMxhVaF8VpcBVeBVWusj6ZYqXZRSd9E+ludbwNuADwAu8DXg57TWuz9yPecopZ4L/K7W+mql1FOIqBOl1KuB1wIN4K1a64+lVuAE6aiby4Fbgfv823+ktf7QNNaNf9j6TcCFQAl4K/ANpO90q5tHkL4DgFLKAt4PKMDBy7VoIH2nW93sIUd9Z5I8US8Hylrr5wG/DtyYcnlSRSlVBtBaX+3/dz3wTuDNWuvDeC/xdWmWMQ2UUm8E/gQo+5d21IlS6iDwC8ALgO8DfkcplZ3DEGMiom4uB94Z6kMfmta6AX4cOOn3k2uAP0T6TkBU3UjfaXMtgNb6BXiJqd+J9J2AqLrJVd+ZGE8UcBXwSQCt9ZeUUs9KuTxpcxlQUUrdhtfONwBXAJ/1738CeClwSzrFS437gR8CbvZ/j6oTB7hda10FqkqpI3gZ+7+ScFmTJqpulFLqOrxV4S8Bz2E66+bDwN+Gfm8gfSegW91I3wG01v9DKRV4TS4AjgHfj/SdbnWTq74zSZ6oBdqhKwBHKTVJRuJu2QDegWe1vw74K8DQWgcp6lfx3KZThdb6I0A9dCmqTjr70lTUVUTdfBn4Va31C4EHgN9ieutmTWu9qpSaxzMY3oz0HaBr3UjfCaG1biil/hz4A7w6kr7jE1E3ueo7k2RErQDho5ZNrXUjrcJkgG8Cf6m1drXW3wROAgdC9+eB5VRKli3CmrCgTjr70rTW1S1a6zuDn4FnMsV1o5Q6H/gMcLPW+oNI32kRUTfSdzrQWv8U8FQ8DdBM6NZU9x3YUTe35anvTJIRdTvwMgCl1JXA3ekWJ3Veia8LU0qdi2fJ36aUutq/fw3wuXSKlinuiqiTLwOHlVJlpdQe4BCe+HPa+JRS6jn+zy8G7mRK60YpdQC4Dfg1rfVN/mXpO3StG+k7Pkqpn1BK/Yb/6wae8X2H9J2udfPRPPWdSQp33QK8RCn1BTyh3vUplydt/hT4gFLq83g7QF4JnADer5QqAvewXccwrfwyHXWitXaUUu/GG9hM4E1a6600C5kSPwv8oVKqBhwFXqO1XpnSurkB2Au8RSn1Fv/aLwLvlr4TWTdvAH5f+g4AHwX+TCn1z0ABT+NzDzLuQHTdPEyOxh3Ddd3+fyUIgiAIgiBsY5LCeYIgCIIgCImReDiv2Wy6jiPerwDLMpD62InUS3ekbqKReumO1E00Ui/dkbppUyhYJ4ClqHsDGVHhLMYd16/FS5DVAG7SWr+/37Mcx2V5eWOQj50KFhcrUh8RSL10R+omGqmX7kjdRCP10h2pmzZLS/Pf7navrxHlZzH+CWC943oBeBfwbP/e7UqpW7XWR0crriDAsZUtTq9kQjeYOTYwWJG62YHUS3ekbqJJul4qRYuFciGxzxPiZxBPVGcW44BDwBGt9WkAfxfYYbzstYIwFE7T5a23fZOPff1Y2kURBEEYK5YB/0Et8YornsDTDs73/wdC5ulrRGmtP6KUujDi1lAZRC3LYHGxMnABJx3LMqU+fJymy2/ccjcf+/oxfvp5F/LUA3NpFymTmKZBsylahU6kXrojdRNN0vVy5PE1/ubOR/jUvcd51gV7+ZkXXMj3qLOxTCOxMgyKzE2DMYqwfKgMoqKJ2o7EnT2cpstv3/ZNPv71Y7z2+RfwK9d8l9RLF6TPRCP10h2pm2iSrpeXPHkfP3H5ufzPrx3lr7/6KD/7wbs4f7HM/3X5E7j20gPMFKzEytIP6TNtlpa6ew1HMaLuAS5WSu2D/93enYfJVdaJHv9WVXdnIUsn0KwJCesrbkgSVgERRS6MDDxer8OAOgOyzaYjXkdl1HnuPM44zrCN1+Ve0FxnEESRiYoLoGzKJhJAQPGFsJqEIAnpJJCkl6q6f1RVUjTd6arqrj7Vdb6f5/F5uuotTv88vzr0j/f9nffwMnAspWe1SXWpLOH96DcvcN5RCzjnyAVJhyRJTTFjSgdnLJ7H+w7Zi9ufWMvVy1fyb7eu4P/e/QzvefMevO+QPemZMSXpMFWjuouoEMIZwIwY4xUhhAuBmyjtN7U0xrhqvANUeysUi/xTuQfqvCMXcK4FlKQU6MhmeGfo4R0H7sLDqzdy9fJV/Md9v+eb96/kxNeV+qYO3NWWhlY34TuWDwzki04RbpfmKdNCscjnbnqcGyoF1FHbC6g0n5fReG6G53kZmedmeK12Xlb2buHaB1bxg0fXsGWgwKF7d3Pm4nkcuc8cspmJ7ZtqtXOTpJ6emcuBJcONtdOz8zSJVBdQ5x6596sKKElKo3nd0/ifx+/PeUct4HsPr+HbD67ib5c9yj5zp/Oni/fipIN2ZWoL9U3Jx74oAZUlvBt+8wLnHLE35x21MOmQJKllzJrayQcPm8/3zzmMfzw50NWR5Z9/+gR/fOV9XHH3M7y0uT/pEFXmTJQmVKFY5J9vfoIfPPoCHzpib85zBkqShtWRy3LSQbvx3163Kw+s3MDV96/kynue4z/u+z0nHbQbZyzZi3133inpMFPNIkoTplAs8s8/fYLvP7qGs4/Ym/OPWkBmgtf5JWmyyWQyLJ7fzeL53Tzz0maufWAVP/zNC3z/0TUcuXAOZy6ex2ELuv33aQJcztOEKBSLfP6nT/D9R0oF1AUWUJJUt4Vzp/PJdx7AD889nAveuoD4h5f56+sf4Yz/fIAbHl1D/2Ah6RBTxSJKTVcpoL73yBrOPny+BZQkjVH39E4+dMQCbjj3cD5z4oEUKfKPNz3OH3/tPpbe+xy9WwaSDjEVXM5TUxWKRf7lZ6UC6qzD53PBWxdaQEnSOOnqyPLHb9ydU96wG/c928s3l6/kq3c9w9JfPsepb9ydj7xtX7o6nC9pFosoNU2hWOQLP1vBsofX8OeHzecvLKAkqSkymQyHL5zD4QvnsGLtK1x9/0q+89BqFsydzvsO2TPp8NqW5amaolAs8q+3rOC/Hn6ePztsPn95tAWUJE2E/XfZic+eeCBv3GMm33pgJXkfPt00FlEad5UC6vpfP88HD53PX1lASdKEymQynLl4Hit7t/LzJ9clHU7bsojSuCoOKaD++hgLKElKwnEH7MKes6Zw9f0rkw6lbVlEadwUi0W+sK2AmmcBJUkJ6shmOH3xPH69eiOPPr8x6XDakkWUxkX1DNQHlszjr4/ZxwJKkhL2x2/cjRlTclx9/6qkQ2lLFlEas2KxyL/d+iTf/fXzvH/JPP7mWAsoSWoFO3V18J4378GtT7zI6g1bkw6n7VhEaUyKxSIX3/ok1z20mjMXz+PDFlCS1FLed8heZDIZrn3A2ajxZhGlhlUKqO+UC6iPvM0CSpJazW4zp3BC6OH7j6xh09bBpMNpKxZRakixWOSS20oF1BmL97KAkqQW9v7F89g8kOd7jzyfdChtxSJKdasUUN9+sFRA/e3b9rWAkqQWFnabwZL5s7n2gVUM5n1I8XixiFJdisUil97+FN9+cDV/usgCSpImizOXzOMPL/fz08dfTDqUtmERpZoVi0Uuu/0prn1gFacv2ouPHmcBJUmTxVH7zGXh3Glcff8qikUfBTMeLKJUk2KxyOV3PMW3ygXUhRZQkjSpZDMZzlg8j/iHl3lg5Yakw2kLFlEaVaWAumb5Kv7kkD0toCRpkjr59bsxZ1on3/RRMOPCIkqjWvrL57YVUB97+34WUJI0SU3pyPI/3rIndz71Es+s25x0OJOeRZR26JmXNvO1e57jXaHHAkqS2sB737IHXbkM1zzgbNRYWURpRMVikUtufZIpHVkutICSpLYwZ3oXJ79+N3782z+wfnN/0uFMahZRGtFtK9Zx77PrueCtC9l5p66kw5EkjZMzF8+jb7DAdx9y882xsIjSsLYM5Lnstic5oGcn3vuWPZMOR5I0jhbuPJ2j953LdQ+tZutAPulwJi2LKA3r//3yOdZs6uPjx+9PR9ZlPElqN2cunsf6LQP85LE/JB3KpGURpdd4bv0Wvnn/Sk46aFcOmTc76XAkSU2weP5swq4zuGb5SgpuvtkQiyi9SrFY5OJbV9CVy/LhY/dJOhxJUpNkMhnOXLIXz7y0hbuffinpcCaljtE+EELIAl8BDgb6gHNijCuqxi8EPgRUHsZzfowxNiFWTYA7VqzjnmfW89Hj9mWXGVOSDkeS1EQnHNjDNaybawAAIABJREFUl37+NFcvX8XR++6cdDiTzqhFFHAaMDXGeGQI4QjgEuDUqvFFwAdjjMubEaAmztaBPJfe/iT77TKd99lMLkltryOX5fRFe/HFnz9N/MPLhF1nJB3SpFJLEXU0cCNAjPHeEMKSIeOLgU+FEHYHfhRj/PyODpbLZejunt5QsO0ol8u2zPm4/JYneH5jH1d/6DB22TnZC6mVzkur8dwMz/MyMs/N8DwvJX929L58/d7nuO7hNVz83jcDnpta1VJEzQKqn1SYDyF0xBgHy6+vBb4MbASWhRDeHWP84UgHy+eL9Pa61XxFd/f0ljgfK3u3cOUvnuLE1/VwYPfUxGNqlfPSijw3w/O8jMxzMzzPy3anvHF3rntoNecePp/dZk7x3FTp6Zk54lgtjeUbgeojZCsFVAghA1weY1wbY+wHfgQcMoZYlZBLbnuSjmyWj7xt36RDkSRNsNMX7UmxWOQ7D65KOpRJpZYi6i7gZIByT9QjVWOzgEdDCDPKBdXxgL1Rk8zPn1zHnU+9xLlHLaDHZnJJSp29Zk/j+AN24b8efp5X+gdH/wcE1FZELQO2hhDuBi4DPhpCOCOEcF6McQNwEXAb8AvgNzHGHzcvXI23rQN5Lrl1BfvsPJ3TD7GZXJLS6swl83i5L88PHn0h6VAmjVF7omKMBeCCIW//rmr8KuCqcY5LE+Q/f/V7Vm/s46v/48105Nw2TJLS6o17zOLgPWdx7fKVnPu2/ZIOZ1Lwr2aKrezdwn/c93veFXpYsnd30uFIkhJ25pJ5rN7Yx099FExNLKJS7FKbySVJVY7db2fmdU/l63c9TdFHwYzKIiql7nxqHb946iXOOXJvdp1pM7kkCXLZDH+6aB6/XrmBh1dvTDqclmcRlUJ9gwUuvvVJFs6dxumL9ko6HElSCznljbsxe1onVy93u4PRWESl0FW/+j2rNmzl48fvT6fN5JKkKtM6c/zpofO5/Ym1rOzdknQ4Lc2/oCmzesNWvnHf73nngbtw2II5SYcjSWpB7z98b3LZDN9yNmqHLKJS5rLbnyQDNpNLkka026ypnHjQrvzg0TVs2DKQdDgtyyIqRe56+iVuX7GOc45cwO6zpiYdjiSphZ25eC+2Dhb4r4efTzqUlmURlRL9gwUuuXUFC+ZM44zFNpNLknbsgJ4ZHL6gm+88uJqBfCHpcFqSRVRKfPP+lfy+12ZySVLtzlwyj7Wv9HPT79x8czj+NU2B5zduZekvn+P4A3bh8IU2k0uSanPEgjnst8t0rr5/lZtvDsMiKgUuu/0pMsBHj7OZXJJUu0wmwxmL57Fi7Svc91xv0uG0HIuoNnfvMy9x2xNrOfuIvW0mlyTV7b+9blfmTu/k6vtXJh1Ky7GIamP9gwX+7dYn2XvONM5cPC/pcCRJk1BXR5b3HbIn9zyznifXvpJ0OC3FIqqNXbN8Jc+t38LH3r4fXR2mWpLUmP9+8J5M6chyzXJno6r5l7VNrdm4la/f+xzH7b8zR+0zN+lwJEmTWPe0Tt79ht34yWN/YO0r/UmH0zIsotrU5Xc8RRG48O37JR2KJKkNnLF4HoP5Itc9tDrpUFqGRVQb+uWz67nl8bWcdfh89rCZXJI0DvaeM41j99uZ6x9azdaBfNLhtASLqDYzkC/wb7esYF73VN6/ZH7S4UiS2siZS+axYesgP/zNC0mH0hIsotrMt5av4tn1W/ifb9+fKTaTS5LG0Vv2msXrd5/Jtx5YRcHNNy2i2skLm/r42r3Pcux+O/PWfW0mlySNr0wmw5mL9+K59Vv4xZPrkg4ncRZRbeTf73iKQhEufLs7k0uSmuP4A3vYfeYUrl6+KulQEmcR1Sbuf66Xn8YX+bPD5rPX7GlJhyNJalMd2QynL9qLB1du4LdrNiUdTqIsotrAYL7Av966gr1mT+WDh9pMLklqrlPftDs7deVS/ygYi6g2cO2Dq3l63WY+9vb9bCaXJDXdjCkdnPamPbjl8RdZs3Fr0uEkxr+4k9yLL/dx5d3PcvS+czlmv52TDkeSlBKnL9oTgG89kN7eKIuoSe7f73iKwUKBj7kzuSRpAu0+ayrvDD18/5E1vNw3mHQ4ibCImsSW/76Xm373Ih88dD7zum0mlyRNrDOXzOOV/jzfe2RN0qEkwiJqkhrMF/jXW1aw56wp/NlhNpNLkibeQbvNZNG82Vz7wCoG84Wkw5lwHaN9IISQBb4CHAz0AefEGFdUjZ8CfBYYBJbGGK9sUqyq8p2HVvPUus1cfOobmNqZSzocSVJKnblkHh/73m+45fG1nHjQrkmHM6FqmYk6DZgaYzwS+CRwSWUghNAJXAa8C3gbcF4IYfdmBKrt1r7cxxV3P8tb95nLsfu5M7kkKTlH7zuXvedM4+rlKymm7FEwo85EAUcDNwLEGO8NISypGjsIWBFjXA8QQrgTOAa4brwDrdXWgTz/9NMneOmV/qRCqEtHZ47BOp+G/cKmPvrzpWbyTCbTpMgkSRpdNpPhjMV78S8/W8H533mYzuzE/F3KZODsI/Zm0bzuCfl9w6mliJoFbKh6nQ8hdMQYB4cZ2wTM3tHBcrkM3d3T6w60Vlv68+TJMFnuE8gPFKi3bt955hT+8u3786Z92ndLg1wu29TvyWTmuRme52VknpvheV5GVu+5OfOofVi+ahPrXumbsL+/WTJMnTYl0RzWUkRtBGZWvc6WC6jhxmYCvTs6WD5fpLd3c11B1uufTw5NPf546u6e3vD5aPZ5TNJYzku789wMz/MyMs/N8DwvI2vk3CT1t7fZOezpmTniWC09UXcBJwOEEI4AHqkaeww4IIQwN4TQBRwL3NN4qJIkSZNDLTNRy4ATQgh3AxngrBDCGcCMGOMVIYQLgZsoFWRLY4zp3bpUkiSlRiaBTvoXgWcn+pdKkiQ1YAHQM9xAEkWUJEnSpOeO5ZIkSQ2wiJIkSWqARZQkSVIDLKIkSZIaYBElSZLUAIsoSZKkBtSy2abGKITQCSwFFgJTgM/FGH9QNX4h8CFKe2gBnB9jjBMdZ1JCCA+y/RmMT8cYz6oaOwX4LDBIaTPXKxMIccKFEP4c+PPyy6nAW4DdY4y95fFUfmdCCIcDX4gxHhdC2B/4BlAEHgX+KsZYqPpsFvgKcDDQB5wTY1wx8VE335Dz8hbgfwN5Sv+/PxhjfGHI50e85trJkPOyCLgBeKI8/NUY47erPpua7wu85txcC+xeHloI3BtjPH3I51PxnamXRdTEeD+wLsb4gRDCzsCDwA+qxhdR+hfd8kSiS1AIYSpAjPG4YcY6gcuAQ4FXgLtCCDfEGNdMaJAJiDF+g1KBQAjhy5QKyOrnUqbuOxNC+DvgA5S+CwCXAp+OMd4eQvg/wKmUnrBQcRowNcZ4ZPmRVZeUP9NWhjkv/w78TYzxoRDC+cAngAurPj/iNddOhjkvi4BLY4yXjPCPpOL7Aq89N5WCKYQwB7gN+OiQz6fiO9MIl/MmxnXAZ6peD33I9WLgUyGEO0MIn5q4sFrCwcD0EMLNIYRby//yqjgIWBFjXB9j7AfuBI5JJMqEhBCWAG+IMV4xZCiN35kngfdUvV4M3FH++SfAO4d8/mjgRoAY473AkmYHmJCh5+X0GOND5Z87gK1DPr+ja66dDPd9+aMQws9DCF8PIQx9qmxavi/w2nNT8b+A/x1jfH7I+2n5ztTNImoCxBhfjjFuKl+03wU+PeQj1wIXAMcDR4cQ3j3RMSZoM3AxcCKlc3B1CKEyQzqL7dPHAJuA2RMbXuIuovQvtqFS952JMV4PDFS9lYkxVh65MNx3Y+j3J1/13WobQ89L5Q9gCOEo4K8pzeZW29E11zaG+b7cB3w8xngs8BTwD0P+kVR8X2DYc0MIYVfgHZRnwIdIxXemERZREySEMJ/SNOlVMcZrqt7PAJfHGNeWZ1t+BBySUJhJeBz4ZoyxGGN8HFgH7FEe2whU/9fiTKCXlAghdAOvizHeNuT9tH9nKgpVPw/33Rj6/cnGGIfOArelEMKfAP8H+KMY44tDhnd0zbWzZVXL38t47TWT2u9L2XuBa2KM+WHG0vqdGZVF1AQIIewG3Ax8Isa4dMjwLODREMKM8h/H44HU9LkAZ1PqPSCEsCel81GZSn4MOCCEMDeE0AUcC9yTSJTJOBb42TDvp/07U/FgCOG48s8nAb8YMn4XcDJAefnhkYkLLTkhhPdTmoE6Lsb41DAf2dE1185uCiEcVv75Hbz2mknl96XKOyktiw8nrd+ZUTkdNzEuAuYAnwkhVHqjrgR2ijFeEUK4iNIsVR9wS4zxxwnFmYSvA98IIdxJ6S6rs4H3hRBmlM/NhcBNlAr+pTHGVQnGOtECpWWH0osQzgBm+J3Z5mPAleUC+zFKS+WEEP6T0pL5MuCEEMLdQAZo+7uJQgg54IvAc8B/hRAA7ogx/kPVeXnNNZeSGZe/AL4UQugH1gDnQbq/L0O86t838Kpzk9bvzKgyxWJx9E9JkiTpVSZ8JqpQKBTz+eYUbrlchmYdW2Nnflqb+Wlt5qe1mZ/WNpb8dHbm1gI9w41NeBGVzxfp7d3clGN3d09v2rE1duantZmf1mZ+Wpv5aW1jyU9Pz8xnRxqrqbE8hHB4COH2Yd4/JYTwqxDCPSGEcxuKTpIkaRIadSZqmF1fK++nbjfp3i0DXHn3s/QNFkb/sF6ja0oH/X32IrYq89PazE9rMz+jO3KfObzjwGFXxSatWpbzKjubXjXk/W27SQOUu/aPobQ794hyuQzd3dMbCHV0uVy2accGuGflGr7z0Gp23qmLjlymab+nXWXIUMSegVZlflqb+Wlt5mfH1m8e4Il1m/nvhy1I5Pc3qz4YtYiKMV4fQlg4zFBDu0lP5p6o9Ru3AHDFnxzM3nOmNe33tCt7Blqb+Wlt5qe1mZ8d+8QPfsvT6zYndo7G2BM14thYNttM3W7SA/nSMl6Xs1CSJNWsM5ehP99+rTBjuTtv227SwMuUdle+eFyialH95dsjO3Nu9C5JUq26ctltExHtpO4iasiuyanaTbryBZjSYRElSVKtujqy2yYi2klNRVSM8RngiPLP11S9fwNwQ1Mia0H95bvynImSJKl27ToTZTVQh4Fty3n2REmSVKvOXLYte6IsourQny/Qkc2QzVhESZJUq65choF8kXZ7Xq9FVB368wW6XMqTJKkuXeVe4oE264uyIqjDQL7oUp4kSXWq9BK325KeRVQd+vOFbdW0JEmqTWV/xXZrLrciqMNAvuCdeZIk1Wn7TJTLeanVP1h0t3JJkupU6Sd2JirFnImSJKl+lX5ie6JSzLvzJEmq37aZqEGX81KrVES5nCdJUj06yzdl9TkTlV79g0WX8yRJqpN354kBtziQJKluXe4TpX4byyVJqtu2LQ7siUqvAXuiJEmqm1sciP68PVGSJNXLLQ7EQL7AFHuiJEmqy5QOZ6JSz54oSZLq52NfxEDex75IklQve6JSrlgs0j/oTJQkSfXa1hM1aBGVSvlCkSL42BdJkurUta0nyuW8VKqs43a6nCdJUl2ymQy5bMa789KqknhnoiRJql9XziIqtSrNcJ1ucSBJUt26clmX89Jq+0yUy3mSJNWrM5d1JiqtBsrP+3E5T5Kk+nXlMm5xkFaV6tktDiRJql9nLusDiNPKxnJJkhrX1eFyXmptn4myJ0qSpHrZE5Vi9kRJktS4duyJ6hjtAyGELPAV4GCgDzgnxriiavxC4EPAi+W3zo8xxibEmqh+tziQJKlhnbksWwdSVkQBpwFTY4xHhhCOAC4BTq0aXwR8MMa4vBkBtooBtziQJKlhXbksm7YOJh3GuKplWuVo4EaAGOO9wJIh44uBT4UQ7gwhfGqc42sZ2x/74kyUJEn16mzDHctrmYmaBWyoep0PIXTEGCvl5LXAl4GNwLIQwrtjjD8c6WC5XIbu7ukNB7wjuVy2acfumFI6VT1zd6K7e1pTfke7a2Z+NHbmp7WZn9ZmfkY3Y1oX+Ze2JHKempWfWoqojcDMqtfZSgEVQsgAl8cYN5Rf/wg4BBixiMrni/T2bm484h3o7p7etGNv2LQVgC2v9NFLe+1zMVGamR+NnflpbeantZmfGhQKbB3IJ3KexpKfnp6ZI47VsjZ1F3AyQLkn6pGqsVnAoyGEGeWC6nigLXujKst59kRJklS/0rPz0rectww4IYRwN5ABzgohnAHMiDFeEUK4CLiN0p17t8QYf9y8cJMzMOhmm5IkNSqVPVExxgJwwZC3f1c1fhVw1TjH1XJ87IskSY0rzUS1VzuMFUGNBvIFchnIZV3OkySpXp0dWfoHCxSL7VNIWUTVqD9fdBZKkqQGdeUyFIF8wSIqdQbyBbrcrVySpIZUeor722hJz6qgRv35gjNRkiQ1qHNbEdU+zeVWBTXqzxfd3kCSpAZV/oa20zYHFlE1Ghh0JkqSpEY5E5Vi/fmCe0RJktSgbT1Rg/ZEpU6pJ8rlPEmSGtHZ4UxUapV6ojxdkiQ1wp6oFBsYLGyroiVJUn3siUqxUk+Uy3mSJDWispozYE9U+gy4nCdJUsMqExHORKWQd+dJktS4ylM/7IlKoYG8PVGSJDWq08e+pJc7lkuS1LguG8vTa8DlPEmSGuYWBynW72NfJElqmMt5KTbgFgeSJDVsW2P5oDNRqZIvFMkXcSZKkqQGudlmSlXWb+2JkiSpMR3ZDNmMPVGpU6ma3eJAkqTGdeay9kSlTSXh9kRJktS4rlzWmai06S83wdkTJUlS4zpzGfpsLE+XfnuiJEkaM2eiUmh7Y7nLeZIkNaqrw56o1Kkk3OU8SZIa15nLOBOVNpWNwVzOkySpcV25rPtEpc32LQ5czpMkqVFucZBCA9u2OPB0SZLUqK5cxse+pI1350mSNHalxvL2KaI6RvtACCELfAU4GOgDzokxrqgaPwX4LDAILI0xXtmkWBPjY18kSRq70hYH7bOcN2oRBZwGTI0xHhlCOAK4BDgVIITQCVwGHAq8AtwVQrghxrimWQHX4rn1W1i/uX/cjvfkus2APVGSJI1FZy7Lpr5Bfr1qw5iPlclkeP3uM+nIJve3uZYi6mjgRoAY470hhCVVYwcBK2KM6wFCCHcCxwDXjXegtdqwZYD3Lv0V413nZoCdumo5XZIkaTizpnbwwqY+zrn21+NyvA8fuw8fOHT+uByrEbVUBbOA6pIxH0LoiDEODjO2CZi9o4Plchm6u6fXHWgtcrksC/aYzff+8ijWvTJ+M1EAO+/UxcI9Zo3rMdMml8s2LfcaO/PT2sxPazM/tfn7d7+eP3rLXuNyrGwGliyYy5SO0VttmpWfWoqojcDMqtfZcgE13NhMoHdHB8vni/T2bq4ryFp1d0+nt3cze07rYM9p4z9r1Ky406KSH7Um89PazE9rMz+1e9Mu41fMbHl5K1tq+NxY8tPTM3PEsVo6pe8CTgYo90Q9UjX2GHBACGFuCKELOBa4p6EoJUmSJpFMsbjj7qGqu/PeTKk16CxgETAjxnhF1d15WUp35315lN/5IvDsWAOXJEmaAAuAnuEGRi2iJEmS9FpufCRJktQAiyhJkqQGWERJkiQ1wCJKkiSpARZRkiRJDbCIkiRJakBbPAyuai+rg4E+4JwY44pko1II4UG2PxboaeCfgG8AReBR4K9ijIVkokuvEMLhwBdijMeFEPZnmJyEEM4FzgcGgc/FGH+YWMApMyQ/i4AbgCfKw1+NMX7b/Ey8EEInsBRYCEwBPgf8Fq+fljBCflbS5OunXWaiTgOmxhiPBD4JXJJwPKkXQpgKEGM8rvy/s4BLgU/HGI+htHHrqUnGmEYhhL8DvgZMLb/1mpyEEHYHPgy8FTgR+HwIYUoS8abNMPlZBFxadR192/wk5v3AuvK1chLwJbx+Wslw+Wn69dMWM1HA0cCNADHGe0MISxKOR6VZwekhhJspfc8uAhYDd5THfwK8C1iWTHip9STwHuCq8uvhcpIH7oox9gF9IYQVlJ5Y8KsJjjWNhstPCCGcSum/pv8WOAzzk4TrgO9WvR7E66eVjJSfpl4/7TITNYvty0YA+RBCuxSIk9Vm4GJKlf4FwNVAJsZY2SJ/EzA7odhSK8Z4PTBQ9dZwORl6PZmrCTJMfu4DPh5jPBZ4CvgHzE8iYowvxxg3hRBmUvpj/Wm8flrGCPlp+vXTLkXURqD6McvZGONgUsEIgMeBb8YYizHGx4F1wG5V4zOB3kQiU7XqnrRKToZeT+YqOctijMsrPwOHYH4SE0KYD9wGXBVjvAavn5YyTH6afv20SxF1F3AyQAjhCOCRZMMRcDbl3rQQwp6Uqv+bQwjHlcdPAn6RTGiq8uAwObkPOCaEMDWEMBs4iFLTrCbeTSGEw8o/vwNYjvlJRAhhN+Bm4BMxxqXlt71+WsQI+Wn69dMuS17LgBNCCHdTau47K+F4BF8HvhFCuJPSnStnA2uBK0MIXcBjvHr9Wsn4GENyEmPMhxC+SOkPQhb4+xjj1iSDTLG/AL4UQugH1gDnxRg3mp9EXATMAT4TQvhM+b2PAF/0+mkJw+XnQuDyZl4/mWKxOPqnJEmS9CrtspwnSZI0oSZ8Oa9QKBTz+ebMfuVyGZp1bI2d+Wlt5qe1mZ/WZn5a21jy09mZWwv0DDdWUxFVvYPukPdPAT5LaT+GpTHGK0c7Vj5fpLd3cy2/tm7d3dObdmyNnflpbeantZmf1mZ+WttY8tPTM/PZkcZGLaLKO+h+AHhlyPudwGXAoeWxu0IIN8QY1zQU5SSzaesgWwfzSYcxqfRls2x8uS/pMDQC89PazE9rMz+1mz21k66O9ugmqmUmaugOuhUHAStijOsByndhHUNp19C29sy6zfzJf9xPwZlbSZLq8sY9ZvL/zjgk6TDGxahFVIzx+hDCwmGGGtr1M5fL0N09veYA65HLZZt27GqvrNtMoQjnHL0PC+Y2//e1i2w2Q8HKs2WZn9Zmflqb+anN93+9mude2jwhf6urNas+GEtjeUO7frZDT9T6DVsAOHrv2bxhj1lN/33twp6B1mZ+Wpv5aW3mpzYPP7eeJ17YNOHnaow9USOOjaWIegw4IIQwF3gZOJbSs9LaXn+5w78z1x5rupIkTYTOXIb+fGH0D04SdRdRIYQzgBkxxitCCBcCN1Hab2ppjHHVeAfYigYGS1+ALosoSZJq1pXLbpuIaAc1FVExxmeAI8o/X1P1/g3ADU2JrIVVqujOjkzCkUiSNHl05bLkC0UKxSLZzOT/G+pUSgMG8s5ESZJUr85cqXDqH2yPJT2rgAb02RMlSVLdKvtDtUtflFVAA+yJkiSpfpXJh3bpi7IKaED/tuW8yb+eK0nSRKn83RxwJiq9BvIFMkAuaxElSVKtts1E2ROVXv35Il0dWTJtcGeBJEkTpdIGM+ByXnoN5Avb7jCQJEm12d4T5UxUavXnCzaVS5JUp64Oe6JSrz9ftIiSJKlOXc5EaWCwsG2vC0mSVJsutzhQvz1RkiTVbVtjuXfnpdeAy3mSJNWt8sxZl/NSrDQT5amTJKkebnEgBvIFdyuXJKlObnEg+vNFZ6IkSaqTj31ReSbKUydJUj18ALHoH7QnSpKkem3viXImKrUG8oVtu65KkqTaVLYH8gHEKdZnT5QkSXXLZDJ05jI2lqeZPVGSJDWmK5e1JyrNSj1RLudJklSvzlzWnqg0cyZKkqTGdOUy9kSlVb5QJF/EIkqSpAZ05rL2RKVVZQrS5TxJkurXlcv62Je0qlTPXR2eOkmS6uXdeSlWuaPA5TxJkuo3pcPG8tSqJN4iSpKk+nW6xUF6Ve4o6HTHckmS6tblFgfpNeByniRJDet0i4P06t92d56nTpKkenV1tM/deR2jfSCEkAW+AhwM9AHnxBhXVI1fCHwIeLH81vkxxtiEWFvC9p4ol/MkSapXO+0TNWoRBZwGTI0xHhlCOAK4BDi1anwR8MEY4/JmBNhqnImSJKlxXblM2/RE1VJEHQ3cCBBjvDeEsGTI+GLgUyGE3YEfxRg/v6OD5XIZurunNxTsaHK5bNOOXdH1h1cAmNs9vem/q91MRH7UOPPT2sxPazM/tZsxvYuBQnFCz1ez8lNLETUL2FD1Oh9C6IgxDpZfXwt8GdgILAshvDvG+MORDpbPF+nt3dxwwDvS3T29aceuWL9hCwD9m/ub/rvazUTkR40zP63N/LQ281O74mCB/sHChJ6vseSnp2fmiGO1rEltBKqPkK0UUCGEDHB5jHFtjLEf+BFwSENRThLblvPc4kCSpLq1U09ULUXUXcDJAOWeqEeqxmYBj4YQZpQLquOBtu6N6nezTUmSGlbqiSpSKE7+O/RqWc5bBpwQQrgbyABnhRDOAGbEGK8IIVwE3Ebpzr1bYow/bl64yavssmpjuSRJ9as8e3YgX2TKJF/VGbWIijEWgAuGvP27qvGrgKvGOa6WNTDoFgeSJDWqMgkxkC8wpWNyT0hM7ugT4BYHkiQ1rjIJ0Q59UVYCdfKxL5IkNa4yCdEOj36xEqjT9pkol/MkSapXV257T9RkZxFVp4F8gc5chkzGIkqSpHp1upyXXv35okt5kiQ1aErH9sbyyc5qoE4D+YJFlCRJDdrWE+VyXvr0Dxbsh5IkqUFdOWeiUqs/X9i2UZgkSaqPPVEpNpAvukeUJEkNqkxE9A+6nJc6/fZESZLUsE6X89Kr1FhuT5QkSY3o2tZYbhGVOv0u50mS1LDKRIQzUSnkFgeSJDXOLQ5SzC0OJElqnFscpJhbHEiS1LjKRESfDyBOH3uiJElqXJePfUkv786TJKlx2UyGXDZjT1QalXqiPG2SJDWqK5dxJiqNBvJF786TJGkMunJZ+u2JSp/+vDNRkiSNRWcuy4DLeelSKBYZLBTtiZIkaQy6chl3LE+bStXsFgeSJDWuqyNrT1TaVBJuT5QkSY3rzGW9Oy9tKlOP9kRJktS4rlyxKqG3AAAFTUlEQVTW5by0qdxJYE+UJEmNc4uDFLInSpKksevMZekfdDkvVVzOkyRp7GwsT6HtjeUu50mS1KhOe6LSp3IngTNRkiQ1rl16ojpG+0AIIQt8BTgY6APOiTGuqBo/BfgsMAgsjTFe2aRYE+cWB5IkjV2atjg4DZgaYzwS+CRwSWUghNAJXAa8C3gbcF4IYfdmBNoKtvdEuZwnSVKjunLt0RM16kwUcDRwI0CM8d4QwpKqsYOAFTHG9QAhhDuBY4DrxjvQWm0dyPMvP3uCdZsHxv3Y68vH9O48SZIa15nLsGHrIH9z/SMNHyMDnHX43hwyb/b4BVanWoqoWcCGqtf5EEJHjHFwmLFNwA7/3+RyGbq7p9cdaC1yuSyzZ09nawG2NOHp0FO7chx7wC68aeHOzJhSy6lTtVwu27Tca+zMT2szP63N/NTnxDftweNrN4/pb3U2k6FrWmdN571Z+amlEtgIzKx6nS0XUMONzQR6d3SwfL5Ib+/muoKsVXf3dPo29/Evf/S6phy/YnBLP71b+pv6O9pRd/f0puVeY2d+Wpv5aW3mpz5v6tmJK9735nE5Vi3nfSz56emZOeJYLetSdwEnA4QQjgCq594eAw4IIcwNIXQBxwL3NBSlJEnSJFLLTNQy4IQQwt2UlyBDCGcAM2KMV4QQLgRuolSQLY0xrmpeuJIkSa0hUyxO+C2GLwLPTvQvlSRJasACoGe4gSSKKEmSpEnPe/UlSZIaYBElSZLUAIsoSZKkBlhESZIkNcAiSpIkqQEWUZIkSQ1oiwfAhRCywFeAg4E+4JwY44pko1II4UG2P1vxaeCfgG8AReBR4K9ijJP/Md6TTAjhcOALMcbjQgj7M0xOQgjnAucDg8DnYow/TCzglBmSn0XADcAT5eGvxhi/bX4mXgihE1gKLASmAJ8DfovXT0sYIT8rafL10y4zUacBU2OMRwKfBC5JOJ7UCyFMBYgxHlf+31nApcCnY4zHUNr9/tQkY0yjEMLfAV8Dppbfek1OQgi7Ax8G3gqcCHw+hDAliXjTZpj8LAIurbqOvm1+EvN+YF35WjkJ+BJeP61kuPw0/fppi5ko4GjgRoAY470hhCUJx6PSrOD0EMLNlL5nFwGLgTvK4z8B3kXpsUKaOE8C7wGuKr8eLid54K4YYx/QF0JYAbwZ+NUEx5pGw+UnhBBOpfRf038LHIb5ScJ1wHerXg/i9dNKRspPU6+fdpmJmsX2ZSOAfAihXQrEyWozcDGlSv8C4GogE2OsbJG/CZidUGypFWO8Hhioemu4nAy9nszVBBkmP/cBH48xHgs8BfwD5icRMcaXY4ybQggzKf2x/jRePy1jhPw0/fpplyJqIzCz6nU2xjiYVDAC4HHgmzHGYozxcWAdsFvV+EygN5HIVK26J62Sk6HXk7lKzrIY4/LKz8AhmJ/EhBDmA7cBV8UYr8Hrp6UMk5+mXz/tUkTdBZwMEEI4Angk2XAEnE25Ny2EsCel6v/mEMJx5fGTgF8kE5qqPDhMTu4DjgkhTA0hzAYOotQ0q4l3UwjhsPLP7wCWY34SEULYDbgZ+ESMcWn5ba+fFjFCfpp+/bTLktcy4IQQwt2UmvvOSjgewdeBb4QQ7qR058rZwFrgyhBCF/AYr16/VjI+xpCcxBjzIYQvUvqDkAX+Psa4NckgU+wvgC+FEPqBNcB5McaN5icRFwFzgM+EED5Tfu8jwBe9flrCcPm5ELi8mddPplgsjv4pSZIkvUq7LOdJkiRNKIsoSZKkBlhESZIkNcAiSpIkqQEWUZIkSQ2wiJIkSWqARZQkSVID/j/ebguGTYKiWAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 720x1296 with 10 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "groups = pro_dfrm67.groupby(['pdb_id','chain_id', 'entity_id'])\n",
    "figure, ax=plt.subplots(10, 1, figsize=(10,18))\n",
    "count = 0\n",
    "for i, j in groups:\n",
    "    label = '%s_%s_%s' % i\n",
    "    #if len(j.residue_number) < 20:\n",
    "    #    continue\n",
    "    handle, = ax[count].plot(j.residue_number, j.observed_ratio, label=label)\n",
    "    mod = j.dropna(subset=['chem_comp_id'])\n",
    "    print(f'{label}->atom_len:{len(j[j.observed_ratio.gt(0)])-len(mod[mod.observed_ratio.gt(0)])}')\n",
    "    if len(mod.residue_number) > 0:\n",
    "        ax[count].scatter(mod.residue_number, mod.observed_ratio)\n",
    "    count += 1\n",
    "    if count == 10:\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.69s/it]\n",
      "2020-02-20 11:32:20,547 ProcessEntryData INFO 1 ids downloaded in 2.71s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>url</th>\n",
       "      <th>innerKey</th>\n",
       "      <th>key</th>\n",
       "      <th>pdb_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>EDS map</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/coordinates/files/3e...</td>\n",
       "      <td>downloads</td>\n",
       "      <td>map</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>EDS difference map</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/coordinates/files/3e...</td>\n",
       "      <td>downloads</td>\n",
       "      <td>map</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Assembly composition XML</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/static/entry/downloa...</td>\n",
       "      <td>downloads</td>\n",
       "      <td>assembly</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Assembly 1 (mmCIF; gz)</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/static/entry/downloa...</td>\n",
       "      <td>downloads</td>\n",
       "      <td>assembly</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Assembly 1 (atom only; mmCIF)</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/static/entry/downloa...</td>\n",
       "      <td>downloads</td>\n",
       "      <td>assembly</td>\n",
       "      <td>3eu7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>Structure Factors</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/entry-files/download...</td>\n",
       "      <td>downloads</td>\n",
       "      <td>PDB</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167</th>\n",
       "      <td>Archive mmCIF file</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/entry-files/1n0w.cif</td>\n",
       "      <td>views</td>\n",
       "      <td>PDB</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>168</th>\n",
       "      <td>Updated mmCIF file</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/entry-files/1n0w_upd...</td>\n",
       "      <td>views</td>\n",
       "      <td>PDB</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>PDB file</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/entry-files/pdb1n0w.ent</td>\n",
       "      <td>views</td>\n",
       "      <td>PDB</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>PDB header</td>\n",
       "      <td>http://www.ebi.ac.uk/pdbe/static/entry/1n0w.he...</td>\n",
       "      <td>views</td>\n",
       "      <td>PDB</td>\n",
       "      <td>1n0w</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>171 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                             label  \\\n",
       "0                          EDS map   \n",
       "1               EDS difference map   \n",
       "2         Assembly composition XML   \n",
       "3           Assembly 1 (mmCIF; gz)   \n",
       "4    Assembly 1 (atom only; mmCIF)   \n",
       "..                             ...   \n",
       "166              Structure Factors   \n",
       "167             Archive mmCIF file   \n",
       "168             Updated mmCIF file   \n",
       "169                       PDB file   \n",
       "170                     PDB header   \n",
       "\n",
       "                                                   url   innerKey       key  \\\n",
       "0    http://www.ebi.ac.uk/pdbe/coordinates/files/3e...  downloads       map   \n",
       "1    http://www.ebi.ac.uk/pdbe/coordinates/files/3e...  downloads       map   \n",
       "2    http://www.ebi.ac.uk/pdbe/static/entry/downloa...  downloads  assembly   \n",
       "3    http://www.ebi.ac.uk/pdbe/static/entry/downloa...  downloads  assembly   \n",
       "4    http://www.ebi.ac.uk/pdbe/static/entry/downloa...  downloads  assembly   \n",
       "..                                                 ...        ...       ...   \n",
       "166  http://www.ebi.ac.uk/pdbe/entry-files/download...  downloads       PDB   \n",
       "167     http://www.ebi.ac.uk/pdbe/entry-files/1n0w.cif      views       PDB   \n",
       "168  http://www.ebi.ac.uk/pdbe/entry-files/1n0w_upd...      views       PDB   \n",
       "169  http://www.ebi.ac.uk/pdbe/entry-files/pdb1n0w.ent      views       PDB   \n",
       "170  http://www.ebi.ac.uk/pdbe/static/entry/1n0w.he...      views       PDB   \n",
       "\n",
       "    pdb_id  \n",
       "0     3eu7  \n",
       "1     3eu7  \n",
       "2     3eu7  \n",
       "3     3eu7  \n",
       "4     3eu7  \n",
       "..     ...  \n",
       "166   1n0w  \n",
       "167   1n0w  \n",
       "168   1n0w  \n",
       "169   1n0w  \n",
       "170   1n0w  \n",
       "\n",
       "[171 rows x 5 columns]"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dfrm8 = ProcessEntryData.main(\n",
    "    pdb_col='pdb_id',\n",
    "    filters={'sifts_range_tage': ('eq', 'Safe'), 'delete': ('ne', True)},\n",
    "    dfrm=dfrm,\n",
    "    suffix='pdb/entry/files/',\n",
    "    method='post',\n",
    "    folder=r'C:\\GitWorks\\Muta3DMaps\\Muta3DMaps\\test\\data'\n",
    ")\n",
    "dfrm8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [],
   "source": [
    "# test\n",
    "path1 = r'C:\\OmicData\\LiGroupWork\\2020_02_20_test\\temp_1.tsv'\n",
    "path2 = r'C:\\OmicData\\LiGroupWork\\2020_02_20_test\\temp_2.tsv'\n",
    "dfrm_test_1 = pd.read_csv(path1, sep='\\t', converters={'pdb_id': str})\n",
    "dfrm_test_2 = pd.read_csv(path2, sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>author_insertion_code</th>\n",
       "      <th>author_residue_number</th>\n",
       "      <th>multiple_conformers</th>\n",
       "      <th>observed_ratio</th>\n",
       "      <th>residue_name</th>\n",
       "      <th>residue_number</th>\n",
       "      <th>chain_id</th>\n",
       "      <th>struct_asym_id</th>\n",
       "      <th>entity_id</th>\n",
       "      <th>pdb_id</th>\n",
       "      <th>chem_comp_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [author_insertion_code, author_residue_number, multiple_conformers, observed_ratio, residue_name, residue_number, chain_id, struct_asym_id, entity_id, pdb_id, chem_comp_id]\n",
       "Index: []"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(dfrm_test_1, dfrm_test_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "set1=set(dfrm_test_1.pdb_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "set2=set(dfrm_test_2.pdb_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "set()"
      ]
     },
     "execution_count": 81,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set1&set2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_cur = {'2esm', '2pcx', '5kkt', '5wne', '4yve', '3f7p', '5hvu', '2b9e', '4ibw', '4q58', '3igl', '5f5p', '3d09', '2etr', '4lof', '2n03', '2bip', '2etk', '2v55', '2odu', '3igk', '5kks', '3d9v', '2biq', '1mb8', '4yvc', '5uzj', '3v8s', '1s1c', '3d05', '5wnf', '3ndm', '3twj', '4ibv', '4w7p', '3d06', '2bin', '4mzi', '3ncz', '2fej', '3tv7', '5wng', '3o0z', '3d08', '2bio', '5wnh', '4l2w', '2odv', '5bml', '4gdo'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set1==set_cur"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
